diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2547,6 +2547,13 @@ setOperationAction(Opc, OrigVT, Promote); AddPromotedToType(Opc, OrigVT, DestVT); } + void setOperationPromotedToType(ArrayRef Ops, MVT OrigVT, + MVT DestVT) { + for (auto Op : Ops) { + setOperationAction(Op, OrigVT, Promote); + AddPromotedToType(Op, OrigVT, DestVT); + } + } /// Targets should invoke this method for each target independent node that /// they want to provide a custom DAG combiner for by implementing the diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -5459,6 +5459,23 @@ Results.push_back(NewAtomic.getValue(1)); break; } + case ISD::SPLAT_VECTOR: { + SDValue Scalar = Node->getOperand(0); + MVT ScalarType = Scalar.getSimpleValueType(); + MVT NewScalarType = NVT.getVectorElementType(); + if (ScalarType.isInteger()) { + Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NewScalarType, Scalar); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2)); + break; + } + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewScalarType, Scalar); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); + break; + } } // Replace the original node with the legalized result. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -166,6 +166,8 @@ /// truncated back to the original type. void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl &Results); + void PromoteReduction(SDNode *Node, SmallVectorImpl &Results); + public: VectorLegalizer(SelectionDAG& dag) : DAG(dag), TLI(dag.getTargetLoweringInfo()) {} @@ -551,6 +553,56 @@ return true; } +void VectorLegalizer::PromoteReduction(SDNode *Node, + SmallVectorImpl &Results) { + MVT VecVT = Node->getOperand(1).getSimpleValueType(); + MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); + MVT ScalarVT = Node->getSimpleValueType(0); + MVT NewScalarVT = NewVecVT.getVectorElementType(); + + SDLoc DL(Node); + SmallVector Operands(Node->getNumOperands()); + + for (unsigned j = 0; j != Node->getNumOperands(); ++j) { + if (Node->getOperand(j).getValueType().isVector() && + !(ISD::isVPOpcode(Node->getOpcode()) && + ISD::getVPMaskIdx(Node->getOpcode()) == j)) + if (Node->getOperand(j) + .getValueType() + .getVectorElementType() + .isFloatingPoint() && + NewVecVT.isVector() && + NewVecVT.getVectorElementType().isFloatingPoint()) + Operands[j] = + DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j)); + else + Operands[j] = + DAG.getNode(ISD::BITCAST, DL, NewVecVT, Node->getOperand(j)); + else if (Node->getOperand(j).getValueType() == ScalarVT && + ISD::getVPExplicitVectorLengthIdx(Node->getOpcode()) != j) + // promote the initial value; + if (Node->getOperand(j).getValueType().isFloatingPoint()) + Operands[j] = + DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(j)); + else + Operands[j] = + DAG.getNode(ISD::ANY_EXTEND, DL, NewScalarVT, Node->getOperand(j)); + else + Operands[j] = Node->getOperand(j); + } + + SDValue Res = DAG.getNode(Node->getOpcode(), DL, NewScalarVT, Operands, + Node->getFlags()); + + if (ScalarVT.isFloatingPoint()) + Res = DAG.getNode(ISD::FP_ROUND, DL, ScalarVT, Res, + DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); + else + Res = DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, Res); + + Results.push_back(Res); +} + void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl &Results) { // For a few operations there is a specific concept for promotion based on // the operand's type. @@ -569,6 +621,23 @@ // Promote the operation by extending the operand. PromoteFP_TO_INT(Node, Results); return; + case ISD::VP_REDUCE_ADD: + case ISD::VP_REDUCE_MUL: + case ISD::VP_REDUCE_AND: + case ISD::VP_REDUCE_OR: + case ISD::VP_REDUCE_XOR: + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_SMIN: + case ISD::VP_REDUCE_UMAX: + case ISD::VP_REDUCE_UMIN: + case ISD::VP_REDUCE_FADD: + case ISD::VP_REDUCE_FMUL: + case ISD::VP_REDUCE_FMAX: + case ISD::VP_REDUCE_FMIN: + case ISD::VP_REDUCE_SEQ_FADD: + // Promote the operation by extending the operand. + PromoteReduction(Node, Results); + return; case ISD::FP_ROUND: case ISD::FP_EXTEND: // These operations are used to do promotion so they can't be promoted @@ -589,7 +658,10 @@ SmallVector Operands(Node->getNumOperands()); for (unsigned j = 0; j != Node->getNumOperands(); ++j) { - if (Node->getOperand(j).getValueType().isVector()) + // Do not promote the mask operand of a VP OP. + bool SkipPromote = (ISD::isVPOpcode(Node->getOpcode()) && + ISD::getVPMaskIdx(Node->getOpcode()) == j); + if (Node->getOperand(j).getValueType().isVector() && !SkipPromote) if (Node->getOperand(j) .getValueType() .getVectorElementType() diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -813,6 +813,25 @@ ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, }; + // TODO: support more ops. + static const unsigned ZvfhminPromoteOps[] = { + ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, + ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, + ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL, + ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT, + ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SPLAT_VECTOR}; + + // TODO: support more vp ops. + static const unsigned ZvfhminPromoteVPOps[] = { + ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, + ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, + ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, + ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT, + ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL, + ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, + ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, + ISD::VP_FNEARBYINT}; + // Sets common operation actions on RVV floating-point vector types. const auto SetCommonVFPActions = [&](MVT VT) { setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); @@ -908,7 +927,22 @@ continue; setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); - // TODO: make others promote? + setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, + VT, Custom); + // load/store + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + + // custom split nxv32f16. + if (VT == MVT::nxv32f16) { + setOperationAction(ZvfhminPromoteOps, VT, Custom); + setOperationAction(ZvfhminPromoteVPOps, VT, Custom); + continue; + } + // Add more promote ops. + MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); + setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); } } @@ -1089,7 +1123,18 @@ !Subtarget.hasVInstructionsF16()) { setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); - // TODO: make others promote? + setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, + VT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + // Don't promote f16 vector operations to f32 if f32 vector type is + // not legal. + // TODO: could split the f16 vector into two vectors and do promotion. + if (!isTypeLegal(F32VecVT)) + continue; + setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); + setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); continue; } @@ -4870,6 +4915,72 @@ return false; } +static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) { + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); + SDLoc DL(Op); + + SmallVector LoOperands(Op.getNumOperands()); + SmallVector HiOperands(Op.getNumOperands()); + + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (!Op.getOperand(j).getValueType().isVector()) { + LoOperands[j] = Op.getOperand(j); + HiOperands[j] = Op.getOperand(j); + continue; + } + std::tie(LoOperands[j], HiOperands[j]) = + DAG.SplitVector(Op.getOperand(j), DL); + } + + SDValue LoRes = + DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags()); + SDValue HiRes = + DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags()); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes); +} + +static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) { + assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op"); + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); + SDLoc DL(Op); + + SmallVector LoOperands(Op.getNumOperands()); + SmallVector HiOperands(Op.getNumOperands()); + + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) { + std::tie(LoOperands[j], HiOperands[j]) = + DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL); + continue; + } + std::tie(LoOperands[j], HiOperands[j]) = + DAG.SplitVector(Op.getOperand(j), DL); + } + + SDValue LoRes = + DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags()); + SDValue HiRes = + DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags()); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes); +} + +static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + + auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL); + auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL); + auto [EVLLo, EVLHi] = + DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL); + + SDValue ResLo = + DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), + {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags()); + return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), + {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags()); +} + SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -5373,6 +5484,10 @@ case ISD::VP_REDUCE_SEQ_FADD: case ISD::VP_REDUCE_FMIN: case ISD::VP_REDUCE_FMAX: + if (Op.getOperand(1).getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVectorReductionOp(Op, DAG); return lowerVPREDUCE(Op, DAG); case ISD::VP_REDUCE_AND: case ISD::VP_REDUCE_OR: @@ -5402,6 +5517,10 @@ case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::SPLAT_VECTOR: + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVectorOp(Op, DAG); if (Op.getValueType().getVectorElementType() == MVT::i1) return lowerVectorMaskSplat(Op, DAG); return SDValue(); @@ -5521,10 +5640,6 @@ assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); return SDValue(); - case ISD::SADDSAT: - case ISD::UADDSAT: - case ISD::SSUBSAT: - case ISD::USUBSAT: case ISD::FADD: case ISD::FSUB: case ISD::FMUL: @@ -5533,12 +5648,21 @@ case ISD::FABS: case ISD::FSQRT: case ISD::FMA: + case ISD::FMINNUM: + case ISD::FMAXNUM: + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVectorOp(Op, DAG); + [[fallthrough]]; + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: - case ISD::FMINNUM: - case ISD::FMAXNUM: return lowerToScalableOp(Op, DAG); case ISD::ABS: case ISD::VP_ABS: @@ -5550,6 +5674,10 @@ case ISD::VSELECT: return lowerFixedLengthVectorSelectToRVV(Op, DAG); case ISD::FCOPYSIGN: + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVectorOp(Op, DAG); return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); case ISD::STRICT_FADD: case ISD::STRICT_FSUB: @@ -5595,9 +5723,6 @@ case ISD::VP_OR: case ISD::VP_XOR: return lowerLogicVPOp(Op, DAG); - case ISD::VP_ASHR: - case ISD::VP_LSHR: - case ISD::VP_SHL: case ISD::VP_FADD: case ISD::VP_FSUB: case ISD::VP_FMUL: @@ -5609,6 +5734,14 @@ case ISD::VP_FMINNUM: case ISD::VP_FMAXNUM: case ISD::VP_FCOPYSIGN: + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVPOp(Op, DAG); + [[fallthrough]]; + case ISD::VP_ASHR: + case ISD::VP_LSHR: + case ISD::VP_SHL: return lowerVPOp(Op, DAG); case ISD::VP_SIGN_EXTEND: case ISD::VP_ZERO_EXTEND: @@ -5659,6 +5792,10 @@ case ISD::VP_FROUND: case ISD::VP_FROUNDEVEN: case ISD::VP_FROUNDTOZERO: + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVPOp(Op, DAG); return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -885,15 +885,18 @@ foreach vti = !listconcat(FractionalGroupIntegerVectors, FractionalGroupFloatVectors, FractionalGroupBFloatVectors) in - let Predicates = GetVTypePredicates.Predicates in + let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal], + GetVTypePredicates.Predicates) in defm : VPatUSLoadStoreSDNode; foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VBF16M1, VF16M1, VF32M1, VF64M1] in - let Predicates = GetVTypePredicates.Predicates in + let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal], + GetVTypePredicates.Predicates) in defm : VPatUSLoadStoreWholeVRSDNode; foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors, GroupBFloatVectors) in - let Predicates = GetVTypePredicates.Predicates in + let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal], + GetVTypePredicates.Predicates) in defm : VPatUSLoadStoreWholeVRSDNode; foreach mti = AllMasks in diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.ceil.v2f16(<2 x half>, <2 x i1>, i32) @@ -23,6 +27,27 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -42,6 +67,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -67,6 +111,27 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -86,6 +151,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -111,6 +195,29 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -130,6 +237,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -157,6 +283,29 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -176,6 +325,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -201,6 +369,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -220,6 +405,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -245,6 +445,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -264,6 +481,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -291,6 +523,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -310,6 +561,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -337,6 +603,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -356,6 +641,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -381,6 +681,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI16_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -400,6 +717,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI17_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -427,6 +759,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -446,6 +797,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI19_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -473,6 +839,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -492,6 +877,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI21_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -519,6 +919,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v15f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } @@ -538,6 +957,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v15f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI23_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -565,6 +999,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI24_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -584,6 +1037,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI25_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -661,6 +1129,75 @@ ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v32f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v25, v0 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: vslidedown.vi v1, v0, 2 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB26_2: +; ZVFHMIN-NEXT: lui a2, %hi(.LCPI26_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v25 +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a1, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v25 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vmflt.vf v1, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } @@ -699,6 +1236,40 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_ceil_v32f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB27_2: +; ZVFHMIN-NEXT: lui a2, %hi(.LCPI27_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.floor.v2f16(<2 x half>, <2 x i1>, i32) @@ -23,6 +27,27 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -42,6 +67,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -67,6 +111,27 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -86,6 +151,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -111,6 +195,29 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -130,6 +237,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -157,6 +283,29 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -176,6 +325,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -201,6 +369,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -220,6 +405,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -245,6 +445,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -264,6 +481,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -291,6 +523,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -310,6 +561,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -337,6 +603,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -356,6 +641,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -381,6 +681,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI16_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -400,6 +717,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI17_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -427,6 +759,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -446,6 +797,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI19_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -473,6 +839,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -492,6 +877,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI21_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -519,6 +919,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v15f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } @@ -538,6 +957,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v15f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI23_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -565,6 +999,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI24_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -584,6 +1037,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI25_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -661,6 +1129,75 @@ ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v32f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v25, v0 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: vslidedown.vi v1, v0, 2 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB26_2: +; ZVFHMIN-NEXT: lui a2, %hi(.LCPI26_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v25 +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a1, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v25 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vmflt.vf v1, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } @@ -699,6 +1236,40 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_floor_v32f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB27_2: +; ZVFHMIN-NEXT: lui a2, %hi(.LCPI27_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1,22 +1,45 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX2 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,LMULMAX1,LMULMAX1-RV64 + +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-vector-bits-min=256 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-vector-bits-min=256 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-vector-bits-min=256 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-vector-bits-min=256 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+d -riscv-v-vector-bits-min=256 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+d -riscv-v-vector-bits-min=256 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX2,ZVFHMINLMULMAX2-RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+d -riscv-v-vector-bits-min=256 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+d -riscv-v-vector-bits-min=256 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMINLMULMAX1-RV64 define void @fadd_v8f16(ptr %x, ptr %y) { -; CHECK-LABEL: fadd_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fadd_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vfadd.vv v8, v8, v9 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fadd_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = fadd <8 x half> %a, %b @@ -25,16 +48,90 @@ } define void @fadd_v6f16(ptr %x, ptr %y) { -; CHECK-LABEL: fadd_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fadd_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v9 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fadd_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fadd_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fadd_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fadd_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fadd <6 x half> %a, %b @@ -43,14 +140,23 @@ } define void @fadd_v4f32(ptr %x, ptr %y) { -; CHECK-LABEL: fadd_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fadd_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vfadd.vv v8, v8, v9 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fadd_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = fadd <4 x float> %a, %b @@ -75,14 +181,28 @@ } define void @fsub_v8f16(ptr %x, ptr %y) { -; CHECK-LABEL: fsub_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fsub_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vfsub.vv v8, v8, v9 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fsub_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = fsub <8 x half> %a, %b @@ -91,16 +211,90 @@ } define void @fsub_v6f16(ptr %x, ptr %y) { -; CHECK-LABEL: fsub_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fsub_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v9 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fsub_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fsub_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fsub_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fsub_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fsub <6 x half> %a, %b @@ -109,14 +303,23 @@ } define void @fsub_v4f32(ptr %x, ptr %y) { -; CHECK-LABEL: fsub_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fsub_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vfsub.vv v8, v8, v9 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fsub_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = fsub <4 x float> %a, %b @@ -141,14 +344,28 @@ } define void @fmul_v8f16(ptr %x, ptr %y) { -; CHECK-LABEL: fmul_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmul_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vfmul.vv v8, v8, v9 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fmul_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = fmul <8 x half> %a, %b @@ -157,16 +374,90 @@ } define void @fmul_v6f16(ptr %x, ptr %y) { -; CHECK-LABEL: fmul_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmul_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v9 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fmul_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fmul_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fmul_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fmul_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fmul <6 x half> %a, %b @@ -175,14 +466,23 @@ } define void @fmul_v4f32(ptr %x, ptr %y) { -; CHECK-LABEL: fmul_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmul_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vfmul.vv v8, v8, v9 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fmul_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = fmul <4 x float> %a, %b @@ -207,14 +507,28 @@ } define void @fdiv_v8f16(ptr %x, ptr %y) { -; CHECK-LABEL: fdiv_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fdiv_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vfdiv.vv v8, v8, v9 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fdiv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = fdiv <8 x half> %a, %b @@ -223,16 +537,90 @@ } define void @fdiv_v6f16(ptr %x, ptr %y) { -; CHECK-LABEL: fdiv_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fdiv_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v9 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfdiv.vv v8, v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfdiv.vv v8, v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fdiv <6 x half> %a, %b @@ -241,14 +629,23 @@ } define void @fdiv_v4f32(ptr %x, ptr %y) { -; CHECK-LABEL: fdiv_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fdiv_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vfdiv.vv v8, v8, v9 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fdiv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = fdiv <4 x float> %a, %b @@ -273,13 +670,25 @@ } define void @fneg_v8f16(ptr %x) { -; CHECK-LABEL: fneg_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fneg_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fneg_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = fneg <8 x half> %a store <8 x half> %b, ptr %x @@ -287,15 +696,81 @@ } define void @fneg_v6f16(ptr %x) { -; CHECK-LABEL: fneg_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fneg_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fneg_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fneg_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fneg_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fneg_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = fneg <6 x half> %a store <6 x half> %b, ptr %x @@ -303,13 +778,21 @@ } define void @fneg_v4f32(ptr %x) { -; CHECK-LABEL: fneg_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fneg_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fneg_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = fneg <4 x float> %a store <4 x float> %b, ptr %x @@ -331,13 +814,25 @@ } define void @fabs_v8f16(ptr %x) { -; CHECK-LABEL: fabs_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fabs_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fabs_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x @@ -346,15 +841,81 @@ declare <8 x half> @llvm.fabs.v8f16(<8 x half>) define void @fabs_v6f16(ptr %x) { -; CHECK-LABEL: fabs_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fabs_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fabs_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fabs_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fabs_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fabs_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -363,13 +924,21 @@ declare <6 x half> @llvm.fabs.v6f16(<6 x half>) define void @fabs_v4f32(ptr %x) { -; CHECK-LABEL: fabs_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fabs_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fabs_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfabs.v v8, v8 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x @@ -393,14 +962,28 @@ declare <2 x double> @llvm.fabs.v2f64(<2 x double>) define void @copysign_v8f16(ptr %x, ptr %y) { -; CHECK-LABEL: copysign_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: copysign_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: copysign_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) @@ -410,16 +993,90 @@ declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>) define void @copysign_v6f16(ptr %x, ptr %y) { -; CHECK-LABEL: copysign_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: copysign_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: copysign_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: copysign_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: copysign_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: copysign_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b) @@ -429,14 +1086,23 @@ declare <6 x half> @llvm.copysign.v6f16(<6 x half>, <6 x half>) define void @copysign_v4f32(ptr %x, ptr %y) { -; CHECK-LABEL: copysign_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: copysign_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: copysign_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) @@ -463,13 +1129,32 @@ declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) define void @copysign_vf_v8f16(ptr %x, half %y) { -; CHECK-LABEL: copysign_vf_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: copysign_vf_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: copysign_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer @@ -479,15 +1164,109 @@ } define void @copysign_vf_v6f16(ptr %x, half %y) { -; CHECK-LABEL: copysign_vf_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: copysign_vf_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: copysign_vf_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: copysign_vf_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: copysign_vf_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: copysign_vf_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -497,13 +1276,21 @@ } define void @copysign_vf_v4f32(ptr %x, float %y) { -; CHECK-LABEL: copysign_vf_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: copysign_vf_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: copysign_vf_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfsgnj.vf v8, v8, fa0 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer @@ -529,14 +1316,33 @@ } define void @copysign_neg_v8f16(ptr %x, ptr %y) { -; CHECK-LABEL: copysign_neg_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: copysign_neg_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: copysign_neg_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = fneg <8 x half> %b @@ -546,16 +1352,110 @@ } define void @copysign_neg_v6f16(ptr %x, ptr %y) { -; CHECK-LABEL: copysign_neg_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: copysign_neg_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: copysign_neg_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: copysign_neg_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: copysign_neg_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: copysign_neg_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fneg <6 x half> %b @@ -565,14 +1465,23 @@ } define void @copysign_neg_v4f32(ptr %x, ptr %y) { -; CHECK-LABEL: copysign_neg_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: copysign_neg_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: copysign_neg_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfsgnjn.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = fneg <4 x float> %b @@ -599,15 +1508,35 @@ } define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { -; CHECK-LABEL: copysign_neg_trunc_v4f16_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vfncvt.f.f.w v10, v8 -; CHECK-NEXT: vfsgnjn.vv v8, v9, v10 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: copysign_neg_trunc_v4f16_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFH-NEXT: vle32.v v8, (a1) +; ZVFH-NEXT: vle16.v v9, (a0) +; ZVFH-NEXT: vfncvt.f.f.w v10, v8 +; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x half>, ptr %x %b = load <4 x float>, ptr %y %c = fneg <4 x float> %b @@ -619,17 +1548,145 @@ declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { -; CHECK-LABEL: copysign_neg_trunc_v3f16_v3f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v8 -; CHECK-NEXT: vfsgnjn.vv v8, v9, v10 -; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; ZVFH-NEXT: vle32.v v8, (a1) +; ZVFH-NEXT: vle16.v v9, (a0) +; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v8 +; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10 +; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: addi sp, sp, -16 +; ZVFHMINLMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle32.v v9, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v8, v10, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, sp, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: flh fa5, 12(sp) +; ZVFHMINLMULMAX2-RV32-NEXT: fsh fa5, 4(a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: addi sp, sp, 16 +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: addi sp, sp, -16 +; ZVFHMINLMULMAX2-RV64-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle64.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: mv a2, sp +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a2) +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a2) +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle32.v v9, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v8, v10, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a1, sp, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse16.v v9, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: flh fa5, 12(sp) +; ZVFHMINLMULMAX2-RV64-NEXT: fsh fa5, 4(a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: addi sp, sp, 16 +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: addi sp, sp, -16 +; ZVFHMINLMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle32.v v9, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v8, v10, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, sp, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: flh fa5, 12(sp) +; ZVFHMINLMULMAX1-RV32-NEXT: fsh fa5, 4(a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: addi sp, sp, 16 +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: addi sp, sp, -16 +; ZVFHMINLMULMAX1-RV64-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle64.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: mv a2, sp +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a2) +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a2) +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle32.v v9, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v8, v10, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a1, sp, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse16.v v9, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: flh fa5, 12(sp) +; ZVFHMINLMULMAX1-RV64-NEXT: fsh fa5, 4(a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: addi sp, sp, 16 +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <3 x half>, ptr %x %b = load <3 x float>, ptr %y %c = fneg <3 x float> %b @@ -661,13 +1718,25 @@ } define void @sqrt_v8f16(ptr %x) { -; CHECK-LABEL: sqrt_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: sqrt_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: sqrt_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x @@ -676,15 +1745,81 @@ declare <8 x half> @llvm.sqrt.v8f16(<8 x half>) define void @sqrt_v6f16(ptr %x) { -; CHECK-LABEL: sqrt_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: sqrt_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: sqrt_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfsqrt.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: sqrt_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfsqrt.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: sqrt_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfsqrt.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: sqrt_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfsqrt.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -693,13 +1828,21 @@ declare <6 x half> @llvm.sqrt.v6f16(<6 x half>) define void @sqrt_v4f32(ptr %x) { -; CHECK-LABEL: sqrt_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: sqrt_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: sqrt_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfsqrt.v v8, v8 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x @@ -723,15 +1866,31 @@ declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) define void @fma_v8f16(ptr %x, ptr %y, ptr %z) { -; CHECK-LABEL: fma_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vfmacc.vv v10, v8, v9 -; CHECK-NEXT: vse16.v v10, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fma_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vle16.v v10, (a2) +; ZVFH-NEXT: vfmacc.vv v10, v8, v9 +; ZVFH-NEXT: vse16.v v10, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fma_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a2) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v10, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = load <8 x half>, ptr %z @@ -742,17 +1901,99 @@ declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) define void @fma_v6f16(ptr %x, ptr %y, ptr %z) { -; CHECK-LABEL: fma_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfmacc.vv v10, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v10, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fma_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vle16.v v10, (a2) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfmacc.vv v10, v8, v9 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v10, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fma_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a2) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmadd.vv v9, v8, v11 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fma_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a2) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmadd.vv v9, v8, v11 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fma_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a2) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmadd.vv v9, v8, v11 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fma_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a2) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmadd.vv v9, v8, v11 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -763,15 +2004,25 @@ declare <6 x half> @llvm.fma.v6f16(<6 x half>, <6 x half>, <6 x half>) define void @fma_v4f32(ptr %x, ptr %y, ptr %z) { -; CHECK-LABEL: fma_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vle32.v v10, (a2) -; CHECK-NEXT: vfmacc.vv v10, v8, v9 -; CHECK-NEXT: vse32.v v10, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fma_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vle32.v v10, (a2) +; ZVFH-NEXT: vfmacc.vv v10, v8, v9 +; ZVFH-NEXT: vse32.v v10, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fma_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vle32.v v10, (a2) +; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9 +; ZVFHMIN-NEXT: vse32.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = load <4 x float>, ptr %z @@ -801,15 +2052,36 @@ declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) { -; CHECK-LABEL: fmsub_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vfmsac.vv v10, v8, v9 -; CHECK-NEXT: vse16.v v10, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmsub_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vle16.v v10, (a2) +; ZVFH-NEXT: vfmsac.vv v10, v8, v9 +; ZVFH-NEXT: vse16.v v10, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fmsub_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a2) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v10, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = load <8 x half>, ptr %z @@ -820,17 +2092,119 @@ } define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) { -; CHECK-LABEL: fmsub_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfmsac.vv v10, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v10, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmsub_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vle16.v v10, (a2) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfmsac.vv v10, v8, v9 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v10, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a2) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v8, v11 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v11 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmacc.vv v10, v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a2) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v8, v11 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v11 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmacc.vv v10, v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a2) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v8, v11 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v11 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmacc.vv v10, v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a2) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v8, v11 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v11 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -841,15 +2215,25 @@ } define void @fnmsub_v4f32(ptr %x, ptr %y, ptr %z) { -; CHECK-LABEL: fnmsub_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vle32.v v10, (a2) -; CHECK-NEXT: vfnmsac.vv v10, v8, v9 -; CHECK-NEXT: vse32.v v10, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fnmsub_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vle32.v v10, (a2) +; ZVFH-NEXT: vfnmsac.vv v10, v8, v9 +; ZVFH-NEXT: vse32.v v10, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fnmsub_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vle32.v v10, (a2) +; ZVFHMIN-NEXT: vfnmsac.vv v10, v8, v9 +; ZVFHMIN-NEXT: vse32.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = load <4 x float>, ptr %z @@ -918,6 +2302,20 @@ ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret +; +; ZVFHMINLMULMAX2-LABEL: fadd_v16f16: +; ZVFHMINLMULMAX2: # %bb.0: +; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMINLMULMAX2-NEXT: vfadd.vv v8, v12, v10 +; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) +; ZVFHMINLMULMAX2-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = fadd <16 x half> %a, %b @@ -964,6 +2362,15 @@ ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret +; +; ZVFHMIN-LABEL: fadd_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y %c = fadd <8 x float> %a, %b @@ -1010,6 +2417,15 @@ ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret +; +; ZVFHMIN-LABEL: fadd_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZVFHMIN-NEXT: vle64.v v8, (a0) +; ZVFHMIN-NEXT: vle64.v v9, (a1) +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse64.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x double>, ptr %x %b = load <4 x double>, ptr %y %c = fadd <4 x double> %a, %b @@ -1056,6 +2472,20 @@ ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret +; +; ZVFHMINLMULMAX2-LABEL: fsub_v16f16: +; ZVFHMINLMULMAX2: # %bb.0: +; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMINLMULMAX2-NEXT: vfsub.vv v8, v12, v10 +; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) +; ZVFHMINLMULMAX2-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = fsub <16 x half> %a, %b @@ -1102,6 +2532,15 @@ ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret +; +; ZVFHMIN-LABEL: fsub_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y %c = fsub <8 x float> %a, %b @@ -1148,6 +2587,15 @@ ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret +; +; ZVFHMIN-LABEL: fsub_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZVFHMIN-NEXT: vle64.v v8, (a0) +; ZVFHMIN-NEXT: vle64.v v9, (a1) +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse64.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x double>, ptr %x %b = load <4 x double>, ptr %y %c = fsub <4 x double> %a, %b @@ -1194,6 +2642,20 @@ ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret +; +; ZVFHMINLMULMAX2-LABEL: fmul_v16f16: +; ZVFHMINLMULMAX2: # %bb.0: +; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMINLMULMAX2-NEXT: vfmul.vv v8, v12, v10 +; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) +; ZVFHMINLMULMAX2-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = fmul <16 x half> %a, %b @@ -1240,6 +2702,15 @@ ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret +; +; ZVFHMIN-LABEL: fmul_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y %c = fmul <8 x float> %a, %b @@ -1286,6 +2757,15 @@ ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret +; +; ZVFHMIN-LABEL: fmul_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZVFHMIN-NEXT: vle64.v v8, (a0) +; ZVFHMIN-NEXT: vle64.v v9, (a1) +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse64.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x double>, ptr %x %b = load <4 x double>, ptr %y %c = fmul <4 x double> %a, %b @@ -1332,6 +2812,20 @@ ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret +; +; ZVFHMINLMULMAX2-LABEL: fdiv_v16f16: +; ZVFHMINLMULMAX2: # %bb.0: +; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMINLMULMAX2-NEXT: vfdiv.vv v8, v12, v10 +; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) +; ZVFHMINLMULMAX2-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = fdiv <16 x half> %a, %b @@ -1378,6 +2872,15 @@ ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret +; +; ZVFHMIN-LABEL: fdiv_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y %c = fdiv <8 x float> %a, %b @@ -1424,6 +2927,15 @@ ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret +; +; ZVFHMIN-LABEL: fdiv_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZVFHMIN-NEXT: vle64.v v8, (a0) +; ZVFHMIN-NEXT: vle64.v v9, (a1) +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse64.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x double>, ptr %x %b = load <4 x double>, ptr %y %c = fdiv <4 x double> %a, %b @@ -1451,6 +2963,18 @@ ; LMULMAX1-NEXT: vse16.v v9, (a0) ; LMULMAX1-NEXT: vse16.v v8, (a1) ; LMULMAX1-NEXT: ret +; +; ZVFHMINLMULMAX2-LABEL: fneg_v16f16: +; ZVFHMINLMULMAX2: # %bb.0: +; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMINLMULMAX2-NEXT: vfneg.v v8, v10 +; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) +; ZVFHMINLMULMAX2-NEXT: ret %a = load <16 x half>, ptr %x %b = fneg <16 x half> %a store <16 x half> %b, ptr %x @@ -1477,6 +3001,14 @@ ; LMULMAX1-NEXT: vse32.v v9, (a0) ; LMULMAX1-NEXT: vse32.v v8, (a1) ; LMULMAX1-NEXT: ret +; +; ZVFHMIN-LABEL: fneg_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x float>, ptr %x %b = fneg <8 x float> %a store <8 x float> %b, ptr %x @@ -1503,6 +3035,14 @@ ; LMULMAX1-NEXT: vse64.v v9, (a0) ; LMULMAX1-NEXT: vse64.v v8, (a1) ; LMULMAX1-NEXT: ret +; +; ZVFHMIN-LABEL: fneg_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZVFHMIN-NEXT: vle64.v v8, (a0) +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: vse64.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x double>, ptr %x %b = fneg <4 x double> %a store <4 x double> %b, ptr %x @@ -1537,6 +3077,22 @@ ; LMULMAX1-NEXT: vse16.v v13, (a0) ; LMULMAX1-NEXT: vse16.v v12, (a3) ; LMULMAX1-NEXT: ret +; +; ZVFHMINLMULMAX2-LABEL: fma_v16f16: +; ZVFHMINLMULMAX2: # %bb.0: +; ZVFHMINLMULMAX2-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; ZVFHMINLMULMAX2-NEXT: vle16.v v8, (a2) +; ZVFHMINLMULMAX2-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-NEXT: vle16.v v10, (a1) +; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMINLMULMAX2-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMINLMULMAX2-NEXT: vfmadd.vv v8, v14, v12 +; ZVFHMINLMULMAX2-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMINLMULMAX2-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMINLMULMAX2-NEXT: vse16.v v10, (a0) +; ZVFHMINLMULMAX2-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y %c = load <16 x half>, ptr %z @@ -1574,6 +3130,16 @@ ; LMULMAX1-NEXT: vse32.v v13, (a0) ; LMULMAX1-NEXT: vse32.v v12, (a3) ; LMULMAX1-NEXT: ret +; +; ZVFHMIN-LABEL: fma_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vle32.v v10, (a2) +; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9 +; ZVFHMIN-NEXT: vse32.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y %c = load <8 x float>, ptr %z @@ -1611,6 +3177,16 @@ ; LMULMAX1-NEXT: vse64.v v13, (a0) ; LMULMAX1-NEXT: vse64.v v12, (a3) ; LMULMAX1-NEXT: ret +; +; ZVFHMIN-LABEL: fma_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZVFHMIN-NEXT: vle64.v v8, (a0) +; ZVFHMIN-NEXT: vle64.v v9, (a1) +; ZVFHMIN-NEXT: vle64.v v10, (a2) +; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9 +; ZVFHMIN-NEXT: vse64.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x double>, ptr %x %b = load <4 x double>, ptr %y %c = load <4 x double>, ptr %z @@ -1621,13 +3197,32 @@ declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) define void @fadd_vf_v8f16(ptr %x, half %y) { -; CHECK-LABEL: fadd_vf_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fadd_vf_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fadd_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer @@ -1637,15 +3232,109 @@ } define void @fadd_vf_v6f16(ptr %x, half %y) { -; CHECK-LABEL: fadd_vf_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fadd_vf_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fadd_vf_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fadd_vf_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fadd_vf_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fadd_vf_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -1655,13 +3344,21 @@ } define void @fadd_vf_v4f32(ptr %x, float %y) { -; CHECK-LABEL: fadd_vf_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fadd_vf_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fadd_vf_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer @@ -1687,13 +3384,32 @@ } define void @fadd_fv_v8f16(ptr %x, half %y) { -; CHECK-LABEL: fadd_fv_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fadd_fv_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fadd_fv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer @@ -1703,15 +3419,109 @@ } define void @fadd_fv_v6f16(ptr %x, half %y) { -; CHECK-LABEL: fadd_fv_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fadd_fv_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fadd_fv_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fadd_fv_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fadd_fv_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fadd_fv_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -1721,13 +3531,21 @@ } define void @fadd_fv_v4f32(ptr %x, float %y) { -; CHECK-LABEL: fadd_fv_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fadd_fv_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fadd_fv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer @@ -1753,13 +3571,32 @@ } define void @fsub_vf_v8f16(ptr %x, half %y) { -; CHECK-LABEL: fsub_vf_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fsub_vf_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fsub_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer @@ -1769,15 +3606,109 @@ } define void @fsub_vf_v6f16(ptr %x, half %y) { -; CHECK-LABEL: fsub_vf_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fsub_vf_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fsub_vf_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fsub_vf_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fsub_vf_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fsub_vf_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -1787,13 +3718,21 @@ } define void @fsub_vf_v4f32(ptr %x, float %y) { -; CHECK-LABEL: fsub_vf_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fsub_vf_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fsub_vf_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer @@ -1819,13 +3758,32 @@ } define void @fsub_fv_v8f16(ptr %x, half %y) { -; CHECK-LABEL: fsub_fv_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fsub_fv_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfrsub.vf v8, v8, fa0 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fsub_fv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer @@ -1835,15 +3793,109 @@ } define void @fsub_fv_v6f16(ptr %x, half %y) { -; CHECK-LABEL: fsub_fv_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fsub_fv_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfrsub.vf v8, v8, fa0 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fsub_fv_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fsub_fv_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fsub_fv_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fsub_fv_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -1853,13 +3905,21 @@ } define void @fsub_fv_v4f32(ptr %x, float %y) { -; CHECK-LABEL: fsub_fv_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fsub_fv_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfrsub.vf v8, v8, fa0 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fsub_fv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfrsub.vf v8, v8, fa0 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer @@ -1885,13 +3945,32 @@ } define void @fmul_vf_v8f16(ptr %x, half %y) { -; CHECK-LABEL: fmul_vf_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmul_vf_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fmul_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer @@ -1901,15 +3980,109 @@ } define void @fmul_vf_v6f16(ptr %x, half %y) { -; CHECK-LABEL: fmul_vf_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmul_vf_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fmul_vf_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fmul_vf_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fmul_vf_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fmul_vf_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -1919,13 +4092,21 @@ } define void @fmul_vf_v4f32(ptr %x, float %y) { -; CHECK-LABEL: fmul_vf_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmul_vf_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fmul_vf_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer @@ -1951,13 +4132,32 @@ } define void @fmul_fv_v8f16(ptr %x, half %y) { -; CHECK-LABEL: fmul_fv_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmul_fv_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fmul_fv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer @@ -1967,15 +4167,109 @@ } define void @fmul_fv_v6f16(ptr %x, half %y) { -; CHECK-LABEL: fmul_fv_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmul_fv_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fmul_fv_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fmul_fv_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fmul_fv_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fmul_fv_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -1985,13 +4279,21 @@ } define void @fmul_fv_v4f32(ptr %x, float %y) { -; CHECK-LABEL: fmul_fv_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmul_fv_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fmul_fv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer @@ -2017,13 +4319,32 @@ } define void @fdiv_vf_v8f16(ptr %x, half %y) { -; CHECK-LABEL: fdiv_vf_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fdiv_vf_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fdiv_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer @@ -2033,15 +4354,109 @@ } define void @fdiv_vf_v6f16(ptr %x, half %y) { -; CHECK-LABEL: fdiv_vf_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fdiv_vf_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_vf_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfdiv.vv v8, v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_vf_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfdiv.vv v8, v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_vf_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfdiv.vv v8, v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_vf_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfdiv.vv v8, v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2051,13 +4466,21 @@ } define void @fdiv_vf_v4f32(ptr %x, float %y) { -; CHECK-LABEL: fdiv_vf_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fdiv_vf_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fdiv_vf_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer @@ -2083,13 +4506,32 @@ } define void @fdiv_fv_v8f16(ptr %x, half %y) { -; CHECK-LABEL: fdiv_fv_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fdiv_fv_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0 +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fdiv_fv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer @@ -2099,15 +4541,109 @@ } define void @fdiv_fv_v6f16(ptr %x, half %y) { -; CHECK-LABEL: fdiv_fv_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fdiv_fv_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fdiv_fv_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fdiv_fv_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fdiv_fv_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fdiv_fv_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v9, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2117,13 +4653,21 @@ } define void @fdiv_fv_v4f32(ptr %x, float %y) { -; CHECK-LABEL: fdiv_fv_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fdiv_fv_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0 +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fdiv_fv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfrdiv.vf v8, v8, fa0 +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = insertelement <4 x float> poison, float %y, i32 0 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer @@ -2149,14 +4693,35 @@ } define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) { -; CHECK-LABEL: fma_vf_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: vse16.v v9, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fma_vf_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 +; ZVFH-NEXT: vse16.v v9, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fma_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = insertelement <8 x half> poison, half %z, i32 0 @@ -2167,16 +4732,118 @@ } define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) { -; CHECK-LABEL: fma_vf_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v9, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fma_vf_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v9, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fma_vf_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v10, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fma_vf_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v10, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fma_vf_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v10, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fma_vf_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v10, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 @@ -2187,14 +4854,23 @@ } define void @fma_vf_v4f32(ptr %x, ptr %y, float %z) { -; CHECK-LABEL: fma_vf_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: vse32.v v9, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fma_vf_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 +; ZVFH-NEXT: vse32.v v9, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fma_vf_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfmacc.vf v9, fa0, v8 +; ZVFHMIN-NEXT: vse32.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = insertelement <4 x float> poison, float %z, i32 0 @@ -2223,14 +4899,35 @@ } define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) { -; CHECK-LABEL: fma_fv_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: vse16.v v9, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fma_fv_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 +; ZVFH-NEXT: vse16.v v9, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fma_fv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = insertelement <8 x half> poison, half %z, i32 0 @@ -2241,16 +4938,118 @@ } define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) { -; CHECK-LABEL: fma_fv_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v9, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fma_fv_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v9, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fma_fv_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v10, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fma_fv_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v10, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fma_fv_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v10, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fma_fv_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v10, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 @@ -2261,14 +5060,23 @@ } define void @fma_fv_v4f32(ptr %x, ptr %y, float %z) { -; CHECK-LABEL: fma_fv_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: vse32.v v9, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fma_fv_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 +; ZVFH-NEXT: vse32.v v9, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fma_fv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfmacc.vf v9, fa0, v8 +; ZVFHMIN-NEXT: vse32.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = insertelement <4 x float> poison, float %z, i32 0 @@ -2297,14 +5105,40 @@ } define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) { -; CHECK-LABEL: fmsub_vf_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vfmsac.vf v9, fa0, v8 -; CHECK-NEXT: vse16.v v9, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmsub_vf_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vfmsac.vf v9, fa0, v8 +; ZVFH-NEXT: vse16.v v9, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fmsub_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmacc.vv v11, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = insertelement <8 x half> poison, half %z, i32 0 @@ -2316,16 +5150,138 @@ } define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) { -; CHECK-LABEL: fmsub_vf_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfmsac.vf v9, fa0, v8 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v9, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmsub_vf_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfmsac.vf v9, fa0, v8 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v9, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_vf_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmv.v.f v10, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfneg.v v9, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmacc.vv v11, v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v11 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_vf_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmv.v.f v10, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfneg.v v9, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmacc.vv v11, v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v11 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_vf_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmv.v.f v10, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfneg.v v9, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmacc.vv v11, v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v11 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_vf_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmv.v.f v10, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfneg.v v9, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmacc.vv v11, v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v11 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 @@ -2337,14 +5293,23 @@ } define void @fnmsub_vf_v4f32(ptr %x, ptr %y, float %z) { -; CHECK-LABEL: fnmsub_vf_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 -; CHECK-NEXT: vse32.v v9, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fnmsub_vf_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vfnmsac.vf v9, fa0, v8 +; ZVFH-NEXT: vse32.v v9, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fnmsub_vf_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfnmsac.vf v9, fa0, v8 +; ZVFHMIN-NEXT: vse32.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = insertelement <4 x float> poison, float %z, i32 0 @@ -2376,14 +5341,23 @@ } define void @fnmsub_fv_v4f32(ptr %x, ptr %y, float %z) { -; CHECK-LABEL: fnmsub_fv_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 -; CHECK-NEXT: vse32.v v9, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fnmsub_fv_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vfnmsac.vf v9, fa0, v8 +; ZVFH-NEXT: vse32.v v9, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fnmsub_fv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfnmsac.vf v9, fa0, v8 +; ZVFHMIN-NEXT: vse32.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = insertelement <4 x float> poison, float %z, i32 0 @@ -2415,20 +5389,20 @@ } define void @trunc_v8f16(ptr %x) { -; CHECK-LABEL: trunc_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI115_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI115_0)(a1) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: trunc_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: lui a1, %hi(.LCPI115_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI115_0)(a1) +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x @@ -2437,22 +5411,22 @@ declare <8 x half> @llvm.trunc.v8f16(<8 x half>) define void @trunc_v6f16(ptr %x) { -; CHECK-LABEL: trunc_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI116_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI116_0)(a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: trunc_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: lui a1, %hi(.LCPI116_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI116_0)(a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -2461,20 +5435,35 @@ declare <6 x half> @llvm.trunc.v6f16(<6 x half>) define void @trunc_v4f32(ptr %x) { -; CHECK-LABEL: trunc_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: trunc_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: lui a1, 307200 +; ZVFH-NEXT: fmv.w.x fa5, a1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: trunc_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x @@ -2505,22 +5494,43 @@ declare <2 x double> @llvm.trunc.v2f64(<2 x double>) define void @ceil_v8f16(ptr %x) { -; CHECK-LABEL: ceil_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI119_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI119_0)(a1) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: ceil_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: lui a1, %hi(.LCPI119_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI119_0)(a1) +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a1, 3 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a1 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: ceil_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x @@ -2529,24 +5539,126 @@ declare <8 x half> @llvm.ceil.v8f16(<8 x half>) define void @ceil_v6f16(ptr %x) { -; CHECK-LABEL: ceil_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI120_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI120_0)(a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: ceil_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: lui a1, %hi(.LCPI120_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI120_0)(a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a1, 3 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a1 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: ceil_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: lui a1, 307200 +; ZVFHMINLMULMAX2-RV32-NEXT: fmv.w.x fa5, a1 +; ZVFHMINLMULMAX2-RV32-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: fsrmi a1, 3 +; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMINLMULMAX2-RV32-NEXT: fsrm a1 +; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: ceil_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: lui a1, 307200 +; ZVFHMINLMULMAX2-RV64-NEXT: fmv.w.x fa5, a1 +; ZVFHMINLMULMAX2-RV64-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: fsrmi a1, 3 +; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMINLMULMAX2-RV64-NEXT: fsrm a1 +; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: ceil_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: lui a1, 307200 +; ZVFHMINLMULMAX1-RV32-NEXT: fmv.w.x fa5, a1 +; ZVFHMINLMULMAX1-RV32-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: fsrmi a1, 3 +; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMINLMULMAX1-RV32-NEXT: fsrm a1 +; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: ceil_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: lui a1, 307200 +; ZVFHMINLMULMAX1-RV64-NEXT: fmv.w.x fa5, a1 +; ZVFHMINLMULMAX1-RV64-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: fsrmi a1, 3 +; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMINLMULMAX1-RV64-NEXT: fsrm a1 +; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -2555,22 +5667,39 @@ declare <6 x half> @llvm.ceil.v6f16(<6 x half>) define void @ceil_v4f32(ptr %x) { -; CHECK-LABEL: ceil_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: ceil_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: lui a1, 307200 +; ZVFH-NEXT: fmv.w.x fa5, a1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a1, 3 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a1 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: ceil_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 3 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x @@ -2603,22 +5732,43 @@ declare <2 x double> @llvm.ceil.v2f64(<2 x double>) define void @floor_v8f16(ptr %x) { -; CHECK-LABEL: floor_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI123_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI123_0)(a1) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: floor_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: lui a1, %hi(.LCPI123_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI123_0)(a1) +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a1, 2 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a1 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: floor_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x @@ -2627,24 +5777,126 @@ declare <8 x half> @llvm.floor.v8f16(<8 x half>) define void @floor_v6f16(ptr %x) { -; CHECK-LABEL: floor_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI124_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI124_0)(a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: floor_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: lui a1, %hi(.LCPI124_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI124_0)(a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a1, 2 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a1 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: floor_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: lui a1, 307200 +; ZVFHMINLMULMAX2-RV32-NEXT: fmv.w.x fa5, a1 +; ZVFHMINLMULMAX2-RV32-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: fsrmi a1, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMINLMULMAX2-RV32-NEXT: fsrm a1 +; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: floor_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: lui a1, 307200 +; ZVFHMINLMULMAX2-RV64-NEXT: fmv.w.x fa5, a1 +; ZVFHMINLMULMAX2-RV64-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: fsrmi a1, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMINLMULMAX2-RV64-NEXT: fsrm a1 +; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: floor_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: lui a1, 307200 +; ZVFHMINLMULMAX1-RV32-NEXT: fmv.w.x fa5, a1 +; ZVFHMINLMULMAX1-RV32-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: fsrmi a1, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMINLMULMAX1-RV32-NEXT: fsrm a1 +; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: floor_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: lui a1, 307200 +; ZVFHMINLMULMAX1-RV64-NEXT: fmv.w.x fa5, a1 +; ZVFHMINLMULMAX1-RV64-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: fsrmi a1, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMINLMULMAX1-RV64-NEXT: fsrm a1 +; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -2653,22 +5905,39 @@ declare <6 x half> @llvm.floor.v6f16(<6 x half>) define void @floor_v4f32(ptr %x) { -; CHECK-LABEL: floor_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: floor_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: lui a1, 307200 +; ZVFH-NEXT: fmv.w.x fa5, a1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a1, 2 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a1 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: floor_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 2 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x @@ -2701,22 +5970,43 @@ declare <2 x double> @llvm.floor.v2f64(<2 x double>) define void @round_v8f16(ptr %x) { -; CHECK-LABEL: round_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI127_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI127_0)(a1) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: round_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: lui a1, %hi(.LCPI127_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI127_0)(a1) +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a1, 4 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a1 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: round_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.round.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x @@ -2725,24 +6015,126 @@ declare <8 x half> @llvm.round.v8f16(<8 x half>) define void @round_v6f16(ptr %x) { -; CHECK-LABEL: round_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI128_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI128_0)(a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: round_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: lui a1, %hi(.LCPI128_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI128_0)(a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a1, 4 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a1 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: round_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: lui a1, 307200 +; ZVFHMINLMULMAX2-RV32-NEXT: fmv.w.x fa5, a1 +; ZVFHMINLMULMAX2-RV32-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMINLMULMAX2-RV32-NEXT: fsrmi a1, 4 +; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMINLMULMAX2-RV32-NEXT: fsrm a1 +; ZVFHMINLMULMAX2-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMINLMULMAX2-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: round_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: lui a1, 307200 +; ZVFHMINLMULMAX2-RV64-NEXT: fmv.w.x fa5, a1 +; ZVFHMINLMULMAX2-RV64-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMINLMULMAX2-RV64-NEXT: fsrmi a1, 4 +; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMINLMULMAX2-RV64-NEXT: fsrm a1 +; ZVFHMINLMULMAX2-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMINLMULMAX2-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: round_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: lui a1, 307200 +; ZVFHMINLMULMAX1-RV32-NEXT: fmv.w.x fa5, a1 +; ZVFHMINLMULMAX1-RV32-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMINLMULMAX1-RV32-NEXT: fsrmi a1, 4 +; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMINLMULMAX1-RV32-NEXT: fsrm a1 +; ZVFHMINLMULMAX1-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMINLMULMAX1-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v9, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v8, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: round_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfabs.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: lui a1, 307200 +; ZVFHMINLMULMAX1-RV64-NEXT: fmv.w.x fa5, a1 +; ZVFHMINLMULMAX1-RV64-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMINLMULMAX1-RV64-NEXT: fsrmi a1, 4 +; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMINLMULMAX1-RV64-NEXT: fsrm a1 +; ZVFHMINLMULMAX1-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMINLMULMAX1-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -2751,22 +6143,39 @@ declare <6 x half> @llvm.round.v6f16(<6 x half>) define void @round_v4f32(ptr %x) { -; CHECK-LABEL: round_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: round_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: lui a1, 307200 +; ZVFH-NEXT: fmv.w.x fa5, a1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a1, 4 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a1 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: round_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.round.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x @@ -2799,20 +6208,39 @@ declare <2 x double> @llvm.round.v2f64(<2 x double>) define void @rint_v8f16(ptr %x) { -; CHECK-LABEL: rint_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI131_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI131_0)(a1) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: rint_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: lui a1, %hi(.LCPI131_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI131_0)(a1) +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: rint_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.rint.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x @@ -2821,20 +6249,35 @@ declare <8 x half> @llvm.rint.v8f16(<8 x half>) define void @rint_v4f32(ptr %x) { -; CHECK-LABEL: rint_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: rint_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: lui a1, 307200 +; ZVFH-NEXT: fmv.w.x fa5, a1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: rint_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.rint.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x @@ -2865,22 +6308,43 @@ declare <2 x double> @llvm.rint.v2f64(<2 x double>) define void @nearbyint_v8f16(ptr %x) { -; CHECK-LABEL: nearbyint_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a1, %hi(.LCPI134_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI134_0)(a1) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a1 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a1 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: nearbyint_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: lui a1, %hi(.LCPI134_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI134_0)(a1) +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: frflags a1 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a1 +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vse16.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nearbyint_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: frflags a1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x @@ -2889,22 +6353,39 @@ declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) define void @nearbyint_v4f32(ptr %x) { -; CHECK-LABEL: nearbyint_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a1 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a1 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: nearbyint_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: lui a1, 307200 +; ZVFH-NEXT: fmv.w.x fa5, a1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: frflags a1 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a1 +; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: vse32.v v8, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nearbyint_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: frflags a1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: fsflags a1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vse32.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) store <4 x float> %b, ptr %x @@ -2937,15 +6418,36 @@ declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) { -; CHECK-LABEL: fmuladd_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vfmacc.vv v10, v8, v9 -; CHECK-NEXT: vse16.v v10, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmuladd_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vle16.v v10, (a2) +; ZVFH-NEXT: vfmacc.vv v10, v8, v9 +; ZVFH-NEXT: vse16.v v10, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fmuladd_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v10, (a2) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = load <8 x half>, ptr %z @@ -2956,17 +6458,119 @@ declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>) define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { -; CHECK-LABEL: fmuladd_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfmacc.vv v10, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v10, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmuladd_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vle16.v v10, (a2) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfmacc.vv v10, v8, v9 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v10, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fmuladd_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a2) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fmuladd_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a2) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fmuladd_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a2) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fmuladd_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a2) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -2977,15 +6581,25 @@ declare <6 x half> @llvm.fmuladd.v6f16(<6 x half>, <6 x half>, <6 x half>) define void @fmuladd_v4f32(ptr %x, ptr %y, ptr %z) { -; CHECK-LABEL: fmuladd_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vle32.v v10, (a2) -; CHECK-NEXT: vfmacc.vv v10, v8, v9 -; CHECK-NEXT: vse32.v v10, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmuladd_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vle32.v v10, (a2) +; ZVFH-NEXT: vfmacc.vv v10, v8, v9 +; ZVFH-NEXT: vse32.v v10, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fmuladd_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vle32.v v10, (a2) +; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9 +; ZVFHMIN-NEXT: vse32.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = load <4 x float>, ptr %z @@ -3015,15 +6629,36 @@ declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) { -; CHECK-LABEL: fmsub_fmuladd_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vfmsac.vv v10, v8, v9 -; CHECK-NEXT: vse16.v v10, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmsub_fmuladd_v8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vle16.v v10, (a2) +; ZVFH-NEXT: vfmsac.vv v10, v8, v9 +; ZVFH-NEXT: vse16.v v10, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fmsub_fmuladd_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v10, (a2) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = load <8 x half>, ptr %z @@ -3034,17 +6669,119 @@ } define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { -; CHECK-LABEL: fmsub_fmuladd_v6f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfmsac.vv v10, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v10, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fmsub_fmuladd_v6f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) +; ZVFH-NEXT: vle16.v v10, (a2) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vfmsac.vv v10, v8, v9 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFH-NEXT: vse16.v v10, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMINLMULMAX2-RV32-LABEL: fmsub_fmuladd_v6f16: +; ZVFHMINLMULMAX2-RV32: # %bb.0: +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: vle16.v v10, (a2) +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX2-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX2-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX2-RV32-NEXT: ret +; +; ZVFHMINLMULMAX2-RV64-LABEL: fmsub_fmuladd_v6f16: +; ZVFHMINLMULMAX2-RV64: # %bb.0: +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vle16.v v10, (a2) +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX2-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX2-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX2-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX2-RV64-NEXT: ret +; +; ZVFHMINLMULMAX1-RV32-LABEL: fmsub_fmuladd_v6f16: +; ZVFHMINLMULMAX1-RV32: # %bb.0: +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: vle16.v v10, (a2) +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV32-NEXT: addi a1, a0, 8 +; ZVFHMINLMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; ZVFHMINLMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMINLMULMAX1-RV32-NEXT: vse16.v v9, (a0) +; ZVFHMINLMULMAX1-RV32-NEXT: ret +; +; ZVFHMINLMULMAX1-RV64-LABEL: fmsub_fmuladd_v6f16: +; ZVFHMINLMULMAX1-RV64: # %bb.0: +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v8, (a1) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vle16.v v10, (a2) +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v11 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMINLMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMINLMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 +; ZVFHMINLMULMAX1-RV64-NEXT: addi a0, a0, 8 +; ZVFHMINLMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; ZVFHMINLMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -3055,15 +6792,25 @@ } define void @fnmsub_fmuladd_v4f32(ptr %x, ptr %y, ptr %z) { -; CHECK-LABEL: fnmsub_fmuladd_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vle32.v v10, (a2) -; CHECK-NEXT: vfnmsac.vv v10, v8, v9 -; CHECK-NEXT: vse32.v v10, (a0) -; CHECK-NEXT: ret +; ZVFH-LABEL: fnmsub_fmuladd_v4f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFH-NEXT: vle32.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vle32.v v10, (a2) +; ZVFH-NEXT: vfnmsac.vv v10, v8, v9 +; ZVFH-NEXT: vse32.v v10, (a0) +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: fnmsub_fmuladd_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vle32.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vle32.v v10, (a2) +; ZVFHMIN-NEXT: vfnmsac.vv v10, v8, v9 +; ZVFHMIN-NEXT: vse32.v v10, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x float>, ptr %x %b = load <4 x float>, ptr %y %c = load <4 x float>, ptr %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN ; This file tests the code generation for `llvm.round.*` on fixed vector type. @@ -21,6 +25,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %a = call <1 x half> @llvm.round.v1f16(<1 x half> %x) ret <1 x half> %a } @@ -41,6 +64,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %a = call <2 x half> @llvm.round.v2f16(<2 x half> %x) ret <2 x half> %a } @@ -61,6 +103,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %a = call <4 x half> @llvm.round.v4f16(<4 x half> %x) ret <4 x half> %a } @@ -81,6 +142,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %a = call <8 x half> @llvm.round.v8f16(<8 x half> %x) ret <8 x half> %a } @@ -101,6 +181,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %a = call <16 x half> @llvm.round.v16f16(<16 x half> %x) ret <16 x half> %a } @@ -122,6 +221,26 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: li a0, 32 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %a = call <32 x half> @llvm.round.v32f16(<32 x half> %x) ret <32 x half> %a } @@ -142,6 +261,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v1f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <1 x float> @llvm.round.v1f32(<1 x float> %x) ret <1 x float> %a } @@ -162,6 +296,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <2 x float> @llvm.round.v2f32(<2 x float> %x) ret <2 x float> %a } @@ -182,6 +331,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <4 x float> @llvm.round.v4f32(<4 x float> %x) ret <4 x float> %a } @@ -202,6 +366,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <8 x float> @llvm.round.v8f32(<8 x float> %x) ret <8 x float> %a } @@ -222,6 +401,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <16 x float> @llvm.round.v16f32(<16 x float> %x) ret <16 x float> %a } @@ -242,6 +436,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v1f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <1 x double> @llvm.round.v1f64(<1 x double> %x) ret <1 x double> %a } @@ -262,6 +471,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, %hi(.LCPI12_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <2 x double> @llvm.round.v2f64(<2 x double> %x) ret <2 x double> %a } @@ -282,6 +506,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, %hi(.LCPI13_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <4 x double> @llvm.round.v4f64(<4 x double> %x) ret <4 x double> %a } @@ -302,6 +541,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: round_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, %hi(.LCPI14_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <8 x double> @llvm.round.v8f64(<8 x double> %x) ret <8 x double> %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN ; This file tests the code generation for `llvm.roundeven.*` on fixed vector type. @@ -21,6 +25,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %a = call <1 x half> @llvm.roundeven.v1f16(<1 x half> %x) ret <1 x half> %a } @@ -41,6 +64,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %a = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x) ret <2 x half> %a } @@ -61,6 +103,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %a = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x) ret <4 x half> %a } @@ -81,6 +142,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %a = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %x) ret <8 x half> %a } @@ -101,6 +181,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %a = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %x) ret <16 x half> %a } @@ -122,6 +221,26 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: li a0, 32 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %a = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %x) ret <32 x half> %a } @@ -142,6 +261,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v1f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <1 x float> @llvm.roundeven.v1f32(<1 x float> %x) ret <1 x float> %a } @@ -162,6 +296,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %x) ret <2 x float> %a } @@ -182,6 +331,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x) ret <4 x float> %a } @@ -202,6 +366,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %x) ret <8 x float> %a } @@ -222,6 +401,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %x) ret <16 x float> %a } @@ -242,6 +436,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v1f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %x) ret <1 x double> %a } @@ -262,6 +471,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, %hi(.LCPI12_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x) ret <2 x double> %a } @@ -282,6 +506,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, %hi(.LCPI13_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %x) ret <4 x double> %a } @@ -302,6 +541,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: roundeven_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, %hi(.LCPI14_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %a = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %x) ret <8 x double> %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare half @llvm.vp.reduce.fadd.v2f16(half, <2 x half>, <2 x i1>, i32) @@ -15,6 +19,19 @@ ; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fadd_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 %evl) ret half %r } @@ -28,6 +45,19 @@ ; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_ord_fadd_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret %r = call half @llvm.vp.reduce.fadd.v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 %evl) ret half %r } @@ -43,6 +73,19 @@ ; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fadd_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 %evl) ret half %r } @@ -56,6 +99,19 @@ ; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_ord_fadd_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret %r = call half @llvm.vp.reduce.fadd.v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 %evl) ret half %r } @@ -71,6 +127,15 @@ ; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fadd_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v9, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa0, v9 +; ZVFHMIN-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 %evl) ret float %r } @@ -84,6 +149,15 @@ ; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_ord_fadd_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v9, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa0, v9 +; ZVFHMIN-NEXT: ret %r = call float @llvm.vp.reduce.fadd.v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 %evl) ret float %r } @@ -99,6 +173,15 @@ ; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fadd_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v9, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa0, v9 +; ZVFHMIN-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 %evl) ret float %r } @@ -112,6 +195,15 @@ ; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_ord_fadd_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v9, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa0, v9 +; ZVFHMIN-NEXT: ret %r = call float @llvm.vp.reduce.fadd.v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 %evl) ret float %r } @@ -142,6 +234,30 @@ ; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fadd_v64f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; ZVFHMIN-NEXT: li a2, 32 +; ZVFHMIN-NEXT: vslidedown.vi v24, v0, 4 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB8_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 32 +; ZVFHMIN-NEXT: .LBB8_2: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v25, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v25, v8, v25, v0.t +; ZVFHMIN-NEXT: addi a1, a0, -32 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v24 +; ZVFHMIN-NEXT: vfredusum.vs v25, v16, v25, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa0, v25 +; ZVFHMIN-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl) ret float %r } @@ -170,6 +286,30 @@ ; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_ord_fadd_v64f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; ZVFHMIN-NEXT: li a2, 32 +; ZVFHMIN-NEXT: vslidedown.vi v24, v0, 4 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB9_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 32 +; ZVFHMIN-NEXT: .LBB9_2: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v25, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v25, v8, v25, v0.t +; ZVFHMIN-NEXT: addi a1, a0, -32 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v24 +; ZVFHMIN-NEXT: vfredosum.vs v25, v16, v25, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa0, v25 +; ZVFHMIN-NEXT: ret %r = call float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl) ret float %r } @@ -185,6 +325,15 @@ ; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fadd_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v9, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa0, v9 +; ZVFHMIN-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 %evl) ret double %r } @@ -198,6 +347,15 @@ ; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_ord_fadd_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v9, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa0, v9 +; ZVFHMIN-NEXT: ret %r = call double @llvm.vp.reduce.fadd.v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 %evl) ret double %r } @@ -213,6 +371,15 @@ ; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fadd_v3f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v10, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa0, v10 +; ZVFHMIN-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 %evl) ret double %r } @@ -226,6 +393,15 @@ ; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_ord_fadd_v3f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v10, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa0, v10 +; ZVFHMIN-NEXT: ret %r = call double @llvm.vp.reduce.fadd.v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 %evl) ret double %r } @@ -241,6 +417,15 @@ ; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fadd_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v10, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa0, v10 +; ZVFHMIN-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 %evl) ret double %r } @@ -254,6 +439,15 @@ ; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_ord_fadd_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v10, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa0, v10 +; ZVFHMIN-NEXT: ret %r = call double @llvm.vp.reduce.fadd.v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 %evl) ret double %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.round.v2f16(<2 x half>, <2 x i1>, i32) @@ -23,6 +27,27 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.round.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -42,6 +67,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.round.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -67,6 +111,27 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.round.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -86,6 +151,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.round.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -111,6 +195,29 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.round.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -130,6 +237,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.round.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -157,6 +283,29 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.round.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -176,6 +325,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.round.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -201,6 +369,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.round.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -220,6 +405,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.round.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -245,6 +445,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.round.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -264,6 +481,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.round.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -291,6 +523,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.round.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -310,6 +561,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.round.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -337,6 +603,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.round.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -356,6 +641,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.round.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -381,6 +681,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI16_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.round.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -400,6 +717,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI17_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.round.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -427,6 +759,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -446,6 +797,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI19_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -473,6 +839,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -492,6 +877,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI21_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -519,6 +919,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v15f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } @@ -538,6 +957,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v15f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI23_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -565,6 +999,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI24_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -584,6 +1037,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI25_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -661,6 +1129,75 @@ ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v32f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v25, v0 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: vslidedown.vi v1, v0, 2 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB26_2: +; ZVFHMIN-NEXT: lui a2, %hi(.LCPI26_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v25 +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a1, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v25 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vmflt.vf v1, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } @@ -699,6 +1236,40 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_v32f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB27_2: +; ZVFHMIN-NEXT: lui a2, %hi(.LCPI27_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.roundeven.v2f16(<2 x half>, <2 x i1>, i32) @@ -23,6 +27,27 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -42,6 +67,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -67,6 +111,27 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -86,6 +151,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -111,6 +195,29 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -130,6 +237,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -157,6 +283,29 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -176,6 +325,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -201,6 +369,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -220,6 +405,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -245,6 +445,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -264,6 +481,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -291,6 +523,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -310,6 +561,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -337,6 +603,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -356,6 +641,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -381,6 +681,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI16_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -400,6 +717,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI17_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -427,6 +759,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -446,6 +797,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI19_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -473,6 +839,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -492,6 +877,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI21_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -519,6 +919,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v15f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } @@ -538,6 +957,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v15f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI23_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -565,6 +999,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI24_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -584,6 +1037,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI25_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -661,6 +1129,75 @@ ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v32f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v25, v0 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: vslidedown.vi v1, v0, 2 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB26_2: +; ZVFHMIN-NEXT: lui a2, %hi(.LCPI26_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v25 +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a1, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v25 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vmflt.vf v1, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } @@ -699,6 +1236,40 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_v32f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB27_2: +; ZVFHMIN-NEXT: lui a2, %hi(.LCPI27_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half>, <2 x i1>, i32) @@ -23,6 +27,27 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -42,6 +67,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -67,6 +111,27 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -86,6 +151,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -111,6 +195,29 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -130,6 +237,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -157,6 +283,29 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -176,6 +325,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -201,6 +369,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -220,6 +405,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -245,6 +445,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -264,6 +481,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -291,6 +523,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -310,6 +561,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -337,6 +603,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -356,6 +641,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -381,6 +681,23 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI16_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI16_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -400,6 +717,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI17_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI17_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -427,6 +759,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -446,6 +797,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI19_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI19_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -473,6 +839,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -492,6 +877,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI21_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI21_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v12, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -519,6 +919,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v15f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } @@ -538,6 +957,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v15f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI23_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI23_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -565,6 +999,25 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI24_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -584,6 +1037,21 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, %hi(.LCPI25_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI25_0)(a1) +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v16, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -661,6 +1129,75 @@ ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v32f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v25, v0 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: vslidedown.vi v1, v0, 2 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB26_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB26_2: +; ZVFHMIN-NEXT: lui a2, %hi(.LCPI26_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v25 +; ZVFHMIN-NEXT: vfabs.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a1, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v25 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vmflt.vf v1, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } @@ -699,6 +1236,40 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_v32f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB27_2: +; ZVFHMIN-NEXT: lui a2, %hi(.LCPI27_0) +; ZVFHMIN-NEXT: fld fa5, %lo(.LCPI27_0)(a2) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v8 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a1, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: fsrm a1 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.fabs.v2f16(<2 x half>, <2 x i1>, i32) @@ -12,6 +16,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -22,6 +36,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -36,6 +60,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -46,6 +80,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -60,6 +104,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -70,6 +124,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -84,6 +148,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -94,6 +168,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -108,6 +192,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.fabs.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -118,6 +208,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fabs.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -132,6 +228,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.fabs.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -142,6 +244,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.fabs.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -156,6 +264,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.fabs.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -166,6 +280,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.fabs.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -180,6 +300,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.fabs.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -190,6 +316,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.fabs.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -204,6 +336,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.fabs.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -214,6 +352,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.fabs.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -228,6 +372,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.fabs.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -238,6 +388,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.fabs.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -252,6 +408,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.fabs.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -262,6 +424,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.fabs.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -276,6 +444,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v15f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.fabs.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } @@ -286,6 +460,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v15f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer %v = call <15 x double> @llvm.vp.fabs.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -300,6 +480,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.fabs.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -310,6 +496,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.fabs.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -339,6 +531,27 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v32f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v24, v0 +; ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVFHMIN-NEXT: vslidedown.vi v0, v0, 2 +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a2, a0, a1 +; ZVFHMIN-NEXT: addi a2, a2, -1 +; ZVFHMIN-NEXT: and a1, a2, a1 +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: vfabs.v v16, v16, v0.t +; ZVFHMIN-NEXT: bltu a0, a1, .LBB26_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a0, 16 +; ZVFHMIN-NEXT: .LBB26_2: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v24 +; ZVFHMIN-NEXT: vfabs.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.fabs.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } @@ -361,6 +574,24 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v32f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB27_2: +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v8 +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.fabs.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.fadd.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) @@ -12,6 +16,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -22,6 +37,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) @@ -34,6 +60,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) @@ -46,6 +88,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -62,6 +120,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v3f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <3 x half> @llvm.vp.fadd.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl) ret <3 x half> %v } @@ -74,6 +143,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -84,6 +164,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) @@ -96,6 +187,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) @@ -108,6 +215,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -124,6 +247,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -134,6 +268,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) @@ -146,6 +291,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) @@ -158,6 +319,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -174,6 +351,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -184,6 +372,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) @@ -196,6 +395,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) @@ -208,6 +423,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -224,6 +455,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -234,6 +471,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> %m, i32 %evl) @@ -246,6 +489,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) @@ -258,6 +507,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v2f32_commute: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %vb, <2 x float> %va, <2 x i1> %m, i32 %evl) @@ -270,6 +525,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -284,6 +545,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v2f32_unmasked_commute: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -300,6 +567,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -310,6 +583,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 %evl) @@ -322,6 +601,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) @@ -334,6 +619,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -350,6 +641,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -360,6 +657,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> %m, i32 %evl) @@ -372,6 +675,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) @@ -384,6 +693,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -400,6 +715,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -410,6 +731,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> %m, i32 %evl) @@ -422,6 +749,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) @@ -434,6 +767,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -450,6 +789,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -460,6 +805,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> %m, i32 %evl) @@ -472,6 +823,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) @@ -484,6 +841,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -500,6 +863,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -510,6 +879,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> %m, i32 %evl) @@ -522,6 +897,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) @@ -534,6 +915,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -550,6 +937,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -560,6 +953,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> %m, i32 %evl) @@ -572,6 +971,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) @@ -584,6 +989,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -600,6 +1011,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v16, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -610,6 +1027,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> %m, i32 %evl) @@ -622,6 +1045,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) @@ -634,6 +1063,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.fdiv.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) @@ -12,6 +16,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -22,6 +37,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) @@ -34,6 +60,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) @@ -46,6 +88,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -62,6 +120,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v3f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <3 x half> @llvm.vp.fdiv.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl) ret <3 x half> %v } @@ -74,6 +143,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -84,6 +164,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) @@ -96,6 +187,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) @@ -108,6 +215,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -124,6 +247,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -134,6 +268,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) @@ -146,6 +291,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) @@ -158,6 +319,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -174,6 +351,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -184,6 +372,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) @@ -196,6 +395,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) @@ -208,6 +423,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -224,6 +455,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -234,6 +471,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> %m, i32 %evl) @@ -246,6 +489,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) @@ -258,6 +507,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -274,6 +529,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -284,6 +545,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 %evl) @@ -296,6 +563,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) @@ -308,6 +581,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -324,6 +603,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -334,6 +619,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> %m, i32 %evl) @@ -346,6 +637,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) @@ -358,6 +655,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -374,6 +677,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -384,6 +693,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> %m, i32 %evl) @@ -396,6 +711,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) @@ -408,6 +729,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -424,6 +751,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -434,6 +767,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> %m, i32 %evl) @@ -446,6 +785,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) @@ -458,6 +803,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -474,6 +825,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -484,6 +841,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> %m, i32 %evl) @@ -496,6 +859,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) @@ -508,6 +877,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -524,6 +899,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -534,6 +915,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> %m, i32 %evl) @@ -546,6 +933,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) @@ -558,6 +951,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -574,6 +973,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v16, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -584,6 +989,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> %m, i32 %evl) @@ -596,6 +1007,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) @@ -608,6 +1025,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32) @@ -13,6 +17,18 @@ ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -23,6 +39,18 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl) @@ -35,6 +63,23 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl) @@ -47,6 +92,23 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -64,6 +126,18 @@ ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -74,6 +148,18 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl) @@ -86,6 +172,23 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl) @@ -98,6 +201,23 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -115,6 +235,18 @@ ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -125,6 +257,18 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl) @@ -137,6 +281,23 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl) @@ -149,6 +310,23 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -166,6 +344,18 @@ ; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -176,6 +366,18 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl) @@ -188,6 +390,23 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl) @@ -200,6 +419,23 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -217,6 +453,13 @@ ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -227,6 +470,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl) @@ -239,6 +488,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v9, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl) @@ -251,6 +506,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -268,6 +529,13 @@ ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v10, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -278,6 +546,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl) @@ -290,6 +564,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v9, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl) @@ -302,6 +582,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -319,6 +605,13 @@ ; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v10, v8, v12, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -329,6 +622,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v10, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl) @@ -341,6 +640,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v10, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl) @@ -353,6 +658,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -370,6 +681,13 @@ ; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -380,6 +698,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl) @@ -392,6 +716,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v12, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl) @@ -404,6 +734,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -421,6 +757,13 @@ ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v10, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -431,6 +774,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl) @@ -443,6 +792,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v9, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl) @@ -455,6 +810,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -472,6 +833,13 @@ ; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v10, v8, v12, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v10 +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -482,6 +850,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v10, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl) @@ -494,6 +868,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v10, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl) @@ -506,6 +886,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -523,6 +909,13 @@ ; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v12 +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -533,6 +926,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl) @@ -545,6 +944,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v12, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl) @@ -557,6 +962,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -576,6 +987,15 @@ ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v15f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vle64.v v24, (a0) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v16 +; ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl) ret <15 x double> %v } @@ -588,6 +1008,14 @@ ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v15f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vle64.v v24, (a0) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24 +; ZVFHMIN-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl) @@ -605,6 +1033,15 @@ ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vle64.v v24, (a0) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v16 +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -617,6 +1054,14 @@ ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vle64.v v24, (a0) +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl) @@ -629,6 +1074,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v16, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl) @@ -641,6 +1092,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vf v8, fa0, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vf_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vf v8, fa0, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -747,6 +1204,102 @@ ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v32f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: li a3, 40 +; ZVFHMIN-NEXT: mul a1, a1, a3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v1, v0 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vle64.v v8, (a2) +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: li a3, 24 +; ZVFHMIN-NEXT: mul a1, a1, a3 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: addi a1, a2, 128 +; ZVFHMIN-NEXT: addi a2, a0, 128 +; ZVFHMIN-NEXT: vle64.v v8, (a0) +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vle64.v v16, (a1) +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vle64.v v8, (a2) +; ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVFHMIN-NEXT: addi a0, a4, -16 +; ZVFHMIN-NEXT: sltu a1, a4, a0 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: and a0, a1, a0 +; ZVFHMIN-NEXT: vslidedown.vi v0, v0, 2 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: li a0, 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: bltu a4, a0, .LBB50_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a4, 16 +; ZVFHMIN-NEXT: .LBB50_2: +; ZVFHMIN-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 40 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.fma.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl) ret <32 x double> %v } @@ -814,6 +1367,69 @@ ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfma_vv_v32f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: li a3, 24 +; ZVFHMIN-NEXT: mul a1, a1, a3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; ZVFHMIN-NEXT: addi a1, a2, 128 +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vle64.v v24, (a1) +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: addi a1, a0, 128 +; ZVFHMIN-NEXT: vle64.v v24, (a1) +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vle64.v v24, (a2) +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vle64.v v0, (a0) +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: mv a0, a4 +; ZVFHMIN-NEXT: bltu a4, a1, .LBB51_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a0, 16 +; ZVFHMIN-NEXT: .LBB51_2: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v24 +; ZVFHMIN-NEXT: addi a0, a4, -16 +; ZVFHMIN-NEXT: sltu a1, a4, a0 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: and a0, a1, a0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8 +; ZVFHMIN-NEXT: vmv8r.v v8, v0 +; ZVFHMIN-NEXT: vmv.v.v v16, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.fma.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.maxnum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) @@ -12,6 +16,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.maxnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -22,6 +37,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.maxnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) @@ -36,6 +62,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.maxnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -46,6 +83,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.maxnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) @@ -60,6 +108,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.maxnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -70,6 +129,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.maxnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) @@ -84,6 +154,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.maxnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -94,6 +175,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.maxnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) @@ -108,6 +200,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.maxnum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -118,6 +216,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.maxnum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) @@ -132,6 +236,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.maxnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -142,6 +252,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.maxnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) @@ -156,6 +272,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.maxnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -166,6 +288,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.maxnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) @@ -180,6 +308,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.maxnum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -190,6 +324,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.maxnum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) @@ -204,6 +344,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.maxnum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -214,6 +360,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.maxnum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) @@ -228,6 +380,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.maxnum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -238,6 +396,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.maxnum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) @@ -252,6 +416,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.maxnum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -262,6 +432,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.maxnum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) @@ -276,6 +452,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v15f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v16, v0.t +; ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.maxnum.v15f64(<15 x double> %va, <15 x double> %vb, <15 x i1> %m, i32 %evl) ret <15 x double> %v } @@ -286,6 +468,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v15f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer %v = call <15 x double> @llvm.vp.maxnum.v15f64(<15 x double> %va, <15 x double> %vb, <15 x i1> %m, i32 %evl) @@ -300,6 +488,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v16, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.maxnum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -310,6 +504,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.maxnum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) @@ -367,6 +567,55 @@ ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v32f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v24, v0 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vle64.v v8, (a0) +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: addi a0, a0, 128 +; ZVFHMIN-NEXT: vle64.v v8, (a0) +; ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVFHMIN-NEXT: addi a0, a2, -16 +; ZVFHMIN-NEXT: sltu a1, a2, a0 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: and a0, a1, a0 +; ZVFHMIN-NEXT: vslidedown.vi v0, v0, 2 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: li a0, 16 +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8, v0.t +; ZVFHMIN-NEXT: bltu a2, a0, .LBB26_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: .LBB26_2: +; ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.maxnum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) ret <32 x double> %v } @@ -393,6 +642,28 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v16, v16, v24 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_v32f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi a1, a0, 128 +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vle64.v v24, (a1) +; ZVFHMIN-NEXT: vle64.v v0, (a0) +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: mv a0, a2 +; ZVFHMIN-NEXT: bltu a2, a1, .LBB27_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a0, 16 +; ZVFHMIN-NEXT: .LBB27_2: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v0 +; ZVFHMIN-NEXT: addi a0, a2, -16 +; ZVFHMIN-NEXT: sltu a1, a2, a0 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: and a0, a1, a0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24 +; ZVFHMIN-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.maxnum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) @@ -12,6 +16,17 @@ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %v } @@ -22,6 +37,22 @@ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x half> poison, half %b, i32 0 %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %splat) @@ -34,6 +65,22 @@ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f16_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x half> poison, half %b, i32 0 %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %splat, <2 x half> %a) @@ -48,6 +95,17 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v4f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %a, <4 x half> %b) ret <4 x half> %v } @@ -58,6 +116,22 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v4f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x half> poison, half %b, i32 0 %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %a, <4 x half> %splat) @@ -70,6 +144,22 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v4f16_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x half> poison, half %b, i32 0 %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %splat, <4 x half> %a) @@ -84,6 +174,17 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v8f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %a, <8 x half> %b) ret <8 x half> %v } @@ -94,6 +195,22 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v8f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x half> poison, half %b, i32 0 %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %a, <8 x half> %splat) @@ -106,6 +223,22 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v8f16_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x half> poison, half %b, i32 0 %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %splat, <8 x half> %a) @@ -120,6 +253,17 @@ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v16f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %a, <16 x half> %b) ret <16 x half> %v } @@ -130,6 +274,22 @@ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v16f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %a, <16 x half> %splat) @@ -142,6 +302,22 @@ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v16f16_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %splat, <16 x half> %a) @@ -156,6 +332,12 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f32_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) ret <2 x float> %v } @@ -166,6 +348,12 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f32_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x float> poison, float %b, i32 0 %splat = shufflevector <2 x float> %head, <2 x float> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %splat) @@ -178,6 +366,12 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f32_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x float> poison, float %b, i32 0 %splat = shufflevector <2 x float> %head, <2 x float> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %splat, <2 x float> %a) @@ -192,6 +386,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v4f32_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) ret <4 x float> %v } @@ -202,6 +402,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v4f32_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x float> poison, float %b, i32 0 %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %splat) @@ -214,6 +420,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v4f32_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x float> poison, float %b, i32 0 %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %splat, <4 x float> %a) @@ -228,6 +440,12 @@ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v8f32_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) ret <8 x float> %v } @@ -238,6 +456,12 @@ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v8f32_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %splat) @@ -250,6 +474,12 @@ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v8f32_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %splat, <8 x float> %a) @@ -264,6 +494,12 @@ ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v16f32_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) ret <16 x float> %v } @@ -274,6 +510,12 @@ ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v16f32_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %splat) @@ -286,6 +528,12 @@ ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v16f32_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %splat, <16 x float> %a) @@ -300,6 +548,12 @@ ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f64_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) ret <2 x double> %v } @@ -310,6 +564,12 @@ ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f64_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x double> poison, double %b, i32 0 %splat = shufflevector <2 x double> %head, <2 x double> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %splat) @@ -322,6 +582,12 @@ ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f64_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x double> poison, double %b, i32 0 %splat = shufflevector <2 x double> %head, <2 x double> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %splat, <2 x double> %a) @@ -336,6 +602,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v4f64_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %a, <4 x double> %b) ret <4 x double> %v } @@ -346,6 +618,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v4f64_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %a, <4 x double> %splat) @@ -358,6 +636,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v4f64_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %splat, <4 x double> %a) @@ -372,6 +656,12 @@ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v8f64_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %a, <8 x double> %b) ret <8 x double> %v } @@ -382,6 +672,12 @@ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v8f64_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %a, <8 x double> %splat) @@ -394,6 +690,12 @@ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v8f64_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %splat, <8 x double> %a) @@ -408,6 +710,12 @@ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v16f64_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %a, <16 x double> %b) ret <16 x double> %v } @@ -418,6 +726,12 @@ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v16f64_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x double> poison, double %b, i32 0 %splat = shufflevector <16 x double> %head, <16 x double> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %a, <16 x double> %splat) @@ -430,6 +744,12 @@ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v16f64_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x double> poison, double %b, i32 0 %splat = shufflevector <16 x double> %head, <16 x double> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %splat, <16 x double> %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.minnum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) @@ -12,6 +16,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.minnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -22,6 +37,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.minnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) @@ -36,6 +62,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.minnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -46,6 +83,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.minnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) @@ -60,6 +108,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.minnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -70,6 +129,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.minnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) @@ -84,6 +154,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.minnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -94,6 +175,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.minnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) @@ -108,6 +200,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.minnum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -118,6 +216,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.minnum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) @@ -132,6 +236,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.minnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -142,6 +252,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.minnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) @@ -156,6 +272,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.minnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -166,6 +288,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.minnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) @@ -180,6 +308,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.minnum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -190,6 +324,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.minnum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) @@ -204,6 +344,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.minnum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -214,6 +360,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.minnum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) @@ -228,6 +380,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.minnum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -238,6 +396,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.minnum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) @@ -252,6 +416,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.minnum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -262,6 +432,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.minnum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) @@ -276,6 +452,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v15f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v16, v0.t +; ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.minnum.v15f64(<15 x double> %va, <15 x double> %vb, <15 x i1> %m, i32 %evl) ret <15 x double> %v } @@ -286,6 +468,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v15f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer %v = call <15 x double> @llvm.vp.minnum.v15f64(<15 x double> %va, <15 x double> %vb, <15 x i1> %m, i32 %evl) @@ -300,6 +488,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v16, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.minnum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -310,6 +504,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.minnum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) @@ -367,6 +567,55 @@ ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v32f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v24, v0 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vle64.v v8, (a0) +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: addi a0, a0, 128 +; ZVFHMIN-NEXT: vle64.v v8, (a0) +; ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVFHMIN-NEXT: addi a0, a2, -16 +; ZVFHMIN-NEXT: sltu a1, a2, a0 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: and a0, a1, a0 +; ZVFHMIN-NEXT: vslidedown.vi v0, v0, 2 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: li a0, 16 +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8, v0.t +; ZVFHMIN-NEXT: bltu a2, a0, .LBB26_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: .LBB26_2: +; ZVFHMIN-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.minnum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) ret <32 x double> %v } @@ -393,6 +642,28 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v16, v16, v24 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_v32f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi a1, a0, 128 +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vle64.v v24, (a1) +; ZVFHMIN-NEXT: vle64.v v0, (a0) +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: mv a0, a2 +; ZVFHMIN-NEXT: bltu a2, a1, .LBB27_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a0, 16 +; ZVFHMIN-NEXT: .LBB27_2: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v0 +; ZVFHMIN-NEXT: addi a0, a2, -16 +; ZVFHMIN-NEXT: sltu a1, a2, a0 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: and a0, a1, a0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24 +; ZVFHMIN-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.minnum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) @@ -12,6 +16,17 @@ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %v } @@ -22,6 +37,22 @@ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x half> poison, half %b, i32 0 %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %splat) @@ -34,6 +65,22 @@ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f16_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x half> poison, half %b, i32 0 %splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.minnum.v2f16(<2 x half> %splat, <2 x half> %a) @@ -48,6 +95,17 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v4f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b) ret <4 x half> %v } @@ -58,6 +116,22 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v4f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x half> poison, half %b, i32 0 %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %splat) @@ -70,6 +144,22 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v4f16_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x half> poison, half %b, i32 0 %splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.minnum.v4f16(<4 x half> %splat, <4 x half> %a) @@ -84,6 +174,17 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v8f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.minnum.v8f16(<8 x half> %a, <8 x half> %b) ret <8 x half> %v } @@ -94,6 +195,22 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v8f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x half> poison, half %b, i32 0 %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.minnum.v8f16(<8 x half> %a, <8 x half> %splat) @@ -106,6 +223,22 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v8f16_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x half> poison, half %b, i32 0 %splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.minnum.v8f16(<8 x half> %splat, <8 x half> %a) @@ -120,6 +253,17 @@ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v16f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.minnum.v16f16(<16 x half> %a, <16 x half> %b) ret <16 x half> %v } @@ -130,6 +274,22 @@ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v16f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.minnum.v16f16(<16 x half> %a, <16 x half> %splat) @@ -142,6 +302,22 @@ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v16f16_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.minnum.v16f16(<16 x half> %splat, <16 x half> %a) @@ -156,6 +332,12 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f32_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) ret <2 x float> %v } @@ -166,6 +348,12 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f32_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x float> poison, float %b, i32 0 %splat = shufflevector <2 x float> %head, <2 x float> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %splat) @@ -178,6 +366,12 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f32_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x float> poison, float %b, i32 0 %splat = shufflevector <2 x float> %head, <2 x float> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.minnum.v2f32(<2 x float> %splat, <2 x float> %a) @@ -192,6 +386,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v4f32_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) ret <4 x float> %v } @@ -202,6 +402,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v4f32_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x float> poison, float %b, i32 0 %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %splat) @@ -214,6 +420,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v4f32_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x float> poison, float %b, i32 0 %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.minnum.v4f32(<4 x float> %splat, <4 x float> %a) @@ -228,6 +440,12 @@ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v8f32_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) ret <8 x float> %v } @@ -238,6 +456,12 @@ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v8f32_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %splat) @@ -250,6 +474,12 @@ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v8f32_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.minnum.v8f32(<8 x float> %splat, <8 x float> %a) @@ -264,6 +494,12 @@ ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v16f32_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) ret <16 x float> %v } @@ -274,6 +510,12 @@ ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v16f32_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %splat) @@ -286,6 +528,12 @@ ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v16f32_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.minnum.v16f32(<16 x float> %splat, <16 x float> %a) @@ -300,6 +548,12 @@ ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f64_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) ret <2 x double> %v } @@ -310,6 +564,12 @@ ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f64_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x double> poison, double %b, i32 0 %splat = shufflevector <2 x double> %head, <2 x double> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %splat) @@ -322,6 +582,12 @@ ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f64_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x double> poison, double %b, i32 0 %splat = shufflevector <2 x double> %head, <2 x double> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.minnum.v2f64(<2 x double> %splat, <2 x double> %a) @@ -336,6 +602,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v4f64_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %b) ret <4 x double> %v } @@ -346,6 +618,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v4f64_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %splat) @@ -358,6 +636,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v4f64_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.minnum.v4f64(<4 x double> %splat, <4 x double> %a) @@ -372,6 +656,12 @@ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v8f64_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %b) ret <8 x double> %v } @@ -382,6 +672,12 @@ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v8f64_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %splat) @@ -394,6 +690,12 @@ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v8f64_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.minnum.v8f64(<8 x double> %splat, <8 x double> %a) @@ -408,6 +710,12 @@ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v16f64_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %b) ret <16 x double> %v } @@ -418,6 +726,12 @@ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v16f64_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x double> poison, double %b, i32 0 %splat = shufflevector <16 x double> %head, <16 x double> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %splat) @@ -430,6 +744,12 @@ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v16f64_fv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x double> poison, double %b, i32 0 %splat = shufflevector <16 x double> %head, <16 x double> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.minnum.v16f64(<16 x double> %splat, <16 x double> %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.fmul.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) @@ -12,6 +16,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -22,6 +37,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) @@ -34,6 +60,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) @@ -46,6 +88,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -62,6 +120,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v3f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <3 x half> @llvm.vp.fmul.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl) ret <3 x half> %v } @@ -74,6 +143,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -84,6 +164,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) @@ -96,6 +187,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) @@ -108,6 +215,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -124,6 +247,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -134,6 +268,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) @@ -146,6 +291,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) @@ -158,6 +319,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -174,6 +351,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -184,6 +372,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) @@ -196,6 +395,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) @@ -208,6 +423,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -224,6 +455,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -234,6 +471,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> %m, i32 %evl) @@ -246,6 +489,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) @@ -258,6 +507,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -274,6 +529,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -284,6 +545,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 %evl) @@ -296,6 +563,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) @@ -308,6 +581,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -324,6 +603,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -334,6 +619,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> %m, i32 %evl) @@ -346,6 +637,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) @@ -358,6 +655,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -374,6 +677,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -384,6 +693,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> %m, i32 %evl) @@ -396,6 +711,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) @@ -408,6 +729,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -424,6 +751,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -434,6 +767,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> %m, i32 %evl) @@ -446,6 +785,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) @@ -458,6 +803,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -474,6 +825,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -484,6 +841,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> %m, i32 %evl) @@ -496,6 +859,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) @@ -508,6 +877,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -524,6 +899,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -534,6 +915,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> %m, i32 %evl) @@ -546,6 +933,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) @@ -558,6 +951,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -574,6 +973,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v16, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.fmul.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -584,6 +989,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.fmul.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> %m, i32 %evl) @@ -596,6 +1007,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.fmul.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) @@ -608,6 +1025,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.fneg.v2f16(<2 x half>, <2 x i1>, i32) @@ -12,6 +16,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -22,6 +36,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -36,6 +60,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -46,6 +80,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -60,6 +104,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -70,6 +124,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v10, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -84,6 +148,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -94,6 +168,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v12, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -108,6 +192,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -118,6 +208,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -132,6 +228,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -142,6 +244,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -156,6 +264,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -166,6 +280,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -180,6 +300,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -190,6 +316,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -204,6 +336,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -214,6 +352,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -228,6 +372,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -238,6 +388,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -252,6 +408,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -262,6 +424,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -276,6 +444,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v15f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.fneg.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } @@ -286,6 +460,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v15f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer %v = call <15 x double> @llvm.vp.fneg.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -300,6 +480,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -310,6 +496,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -339,6 +531,27 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v32f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v24, v0 +; ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVFHMIN-NEXT: vslidedown.vi v0, v0, 2 +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a2, a0, a1 +; ZVFHMIN-NEXT: addi a2, a2, -1 +; ZVFHMIN-NEXT: and a1, a2, a1 +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: vfneg.v v16, v16, v0.t +; ZVFHMIN-NEXT: bltu a0, a1, .LBB26_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a0, 16 +; ZVFHMIN-NEXT: .LBB26_2: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v24 +; ZVFHMIN-NEXT: vfneg.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.fneg.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } @@ -361,6 +574,24 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v16, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v32f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB27_2: +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v16, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.fneg.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.sqrt.v2f16(<2 x half>, <2 x i1>, i32) @@ -12,6 +16,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -22,6 +36,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v9, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) @@ -36,6 +60,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -46,6 +80,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v9, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) @@ -60,6 +104,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -70,6 +124,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v10, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) @@ -84,6 +148,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -94,6 +168,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v12, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) @@ -108,6 +192,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.sqrt.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -118,6 +208,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.sqrt.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) @@ -132,6 +228,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -142,6 +244,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) @@ -156,6 +264,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.sqrt.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -166,6 +280,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.sqrt.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) @@ -180,6 +300,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.sqrt.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -190,6 +316,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.sqrt.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) @@ -204,6 +336,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.sqrt.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -214,6 +352,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.sqrt.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) @@ -228,6 +372,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.sqrt.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -238,6 +388,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.sqrt.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) @@ -252,6 +408,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.sqrt.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -262,6 +424,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.sqrt.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) @@ -276,6 +444,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v15f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <15 x double> @llvm.vp.sqrt.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v } @@ -286,6 +460,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v15f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer %v = call <15 x double> @llvm.vp.sqrt.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) @@ -300,6 +480,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.sqrt.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -310,6 +496,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.sqrt.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) @@ -339,6 +531,27 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v32f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v24, v0 +; ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVFHMIN-NEXT: vslidedown.vi v0, v0, 2 +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a2, a0, a1 +; ZVFHMIN-NEXT: addi a2, a2, -1 +; ZVFHMIN-NEXT: and a1, a2, a1 +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: vfsqrt.v v16, v16, v0.t +; ZVFHMIN-NEXT: bltu a0, a1, .LBB26_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a0, 16 +; ZVFHMIN-NEXT: .LBB26_2: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v24 +; ZVFHMIN-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFHMIN-NEXT: ret %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v } @@ -361,6 +574,24 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v16, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_v32f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: li a2, 16 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB27_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: li a1, 16 +; ZVFHMIN-NEXT: .LBB27_2: +; ZVFHMIN-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v8, v8 +; ZVFHMIN-NEXT: addi a1, a0, -16 +; ZVFHMIN-NEXT: sltu a0, a0, a1 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v16, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN declare <2 x half> @llvm.vp.fsub.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) @@ -12,6 +16,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -22,6 +37,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) @@ -34,6 +60,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) @@ -46,6 +88,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -62,6 +120,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v3f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <3 x half> @llvm.vp.fsub.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl) ret <3 x half> %v } @@ -74,6 +143,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -84,6 +164,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) @@ -96,6 +187,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) @@ -108,6 +215,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -124,6 +247,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -134,6 +268,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) @@ -146,6 +291,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) @@ -158,6 +319,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -174,6 +351,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -184,6 +372,17 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) @@ -196,6 +395,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16, v0.t +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) @@ -208,6 +423,22 @@ ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -224,6 +455,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> %m, i32 %evl) ret <2 x float> %v } @@ -234,6 +471,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> %m, i32 %evl) @@ -246,6 +489,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v2f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer %v = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) @@ -258,6 +507,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v2f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -274,6 +529,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 %evl) ret <4 x float> %v } @@ -284,6 +545,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 %evl) @@ -296,6 +563,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer %v = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) @@ -308,6 +581,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v4f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -324,6 +603,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> %m, i32 %evl) ret <8 x float> %v } @@ -334,6 +619,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> %m, i32 %evl) @@ -346,6 +637,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer %v = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) @@ -358,6 +655,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v8f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -374,6 +677,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> %m, i32 %evl) ret <16 x float> %v } @@ -384,6 +693,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> %m, i32 %evl) @@ -396,6 +711,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v16f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer %v = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) @@ -408,6 +729,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v16f32_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 @@ -424,6 +751,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret %v = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> %m, i32 %evl) ret <2 x double> %v } @@ -434,6 +767,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement <2 x i1> poison, i1 true, i32 0 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> %m, i32 %evl) @@ -446,6 +785,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v2f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer %v = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) @@ -458,6 +803,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v2f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer %head = insertelement <2 x i1> poison, i1 true, i32 0 @@ -474,6 +825,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret %v = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> %m, i32 %evl) ret <4 x double> %v } @@ -484,6 +841,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement <4 x i1> poison, i1 true, i32 0 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> %m, i32 %evl) @@ -496,6 +859,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v4f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer %v = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) @@ -508,6 +877,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v4f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer %head = insertelement <4 x i1> poison, i1 true, i32 0 @@ -524,6 +899,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret %v = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> %m, i32 %evl) ret <8 x double> %v } @@ -534,6 +915,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement <8 x i1> poison, i1 true, i32 0 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> %m, i32 %evl) @@ -546,6 +933,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer %v = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) @@ -558,6 +951,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v8f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer %head = insertelement <8 x i1> poison, i1 true, i32 0 @@ -574,6 +973,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v16, v0.t +; ZVFHMIN-NEXT: ret %v = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> %m, i32 %evl) ret <16 x double> %v } @@ -584,6 +989,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v16 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> %m, i32 %evl) @@ -596,6 +1007,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v16f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer %v = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) @@ -608,6 +1025,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_v16f64_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0 +; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer %head = insertelement <16 x i1> poison, i1 true, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -1,47 +1,91 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.nearbyint.nxv1f16(, , i32) define @vp_nearbyint_nxv1f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_nearbyint_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_nearbyint_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv1f16( %va, %m, i32 %evl) ret %v } define @vp_nearbyint_nxv1f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_nearbyint_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_nearbyint_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.nearbyint.nxv1f16( %va, %m, i32 %evl) @@ -51,41 +95,81 @@ declare @llvm.vp.nearbyint.nxv2f16(, , i32) define @vp_nearbyint_nxv2f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_nearbyint_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_nearbyint_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv2f16( %va, %m, i32 %evl) ret %v } define @vp_nearbyint_nxv2f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_nearbyint_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_nearbyint_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.nearbyint.nxv2f16( %va, %m, i32 %evl) @@ -95,41 +179,83 @@ declare @llvm.vp.nearbyint.nxv4f16(, , i32) define @vp_nearbyint_nxv4f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_nearbyint_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_nearbyint_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv4f16( %va, %m, i32 %evl) ret %v } define @vp_nearbyint_nxv4f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_nearbyint_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_nearbyint_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.nearbyint.nxv4f16( %va, %m, i32 %evl) @@ -139,43 +265,85 @@ declare @llvm.vp.nearbyint.nxv8f16(, , i32) define @vp_nearbyint_nxv8f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI6_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_nearbyint_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_nearbyint_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv8f16( %va, %m, i32 %evl) ret %v } define @vp_nearbyint_nxv8f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI7_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_nearbyint_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_nearbyint_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.nearbyint.nxv8f16( %va, %m, i32 %evl) @@ -185,43 +353,85 @@ declare @llvm.vp.nearbyint.nxv16f16(, , i32) define @vp_nearbyint_nxv16f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI8_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_nearbyint_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_nearbyint_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f16( %va, %m, i32 %evl) ret %v } define @vp_nearbyint_nxv16f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI9_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_nearbyint_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_nearbyint_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.nearbyint.nxv16f16( %va, %m, i32 %evl) @@ -231,43 +441,178 @@ declare @llvm.vp.nearbyint.nxv32f16(, , i32) define @vp_nearbyint_nxv32f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI10_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_nearbyint_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v1, v0 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v17, v0, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: lui a2, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: frflags a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v1, v16, fa5, v0.t +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv32f16( %va, %m, i32 %evl) ret %v } define @vp_nearbyint_nxv32f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_nearbyint_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI11_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_nearbyint_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: fsflags a0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v1 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v17, v1, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: lui a2, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: frflags a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v1, v16, fa5, v0.t +; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.nearbyint.nxv32f16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -1,43 +1,83 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.rint.nxv1f16(, , i32) define @vp_rint_nxv1f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_rint_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_rint_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv1f16( %va, %m, i32 %evl) ret %v } define @vp_rint_nxv1f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_rint_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_rint_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.rint.nxv1f16( %va, %m, i32 %evl) @@ -47,37 +87,73 @@ declare @llvm.vp.rint.nxv2f16(, , i32) define @vp_rint_nxv2f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_rint_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_rint_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv2f16( %va, %m, i32 %evl) ret %v } define @vp_rint_nxv2f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_rint_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_rint_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.rint.nxv2f16( %va, %m, i32 %evl) @@ -87,37 +163,75 @@ declare @llvm.vp.rint.nxv4f16(, , i32) define @vp_rint_nxv4f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_rint_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_rint_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv4f16( %va, %m, i32 %evl) ret %v } define @vp_rint_nxv4f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_rint_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_rint_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.rint.nxv4f16( %va, %m, i32 %evl) @@ -127,39 +241,77 @@ declare @llvm.vp.rint.nxv8f16(, , i32) define @vp_rint_nxv8f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI6_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_rint_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_rint_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv8f16( %va, %m, i32 %evl) ret %v } define @vp_rint_nxv8f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI7_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_rint_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_rint_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.rint.nxv8f16( %va, %m, i32 %evl) @@ -169,39 +321,77 @@ declare @llvm.vp.rint.nxv16f16(, , i32) define @vp_rint_nxv16f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI8_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_rint_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_rint_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv16f16( %va, %m, i32 %evl) ret %v } define @vp_rint_nxv16f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI9_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_rint_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_rint_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.rint.nxv16f16( %va, %m, i32 %evl) @@ -211,39 +401,168 @@ declare @llvm.vp.rint.nxv32f16(, , i32) define @vp_rint_nxv32f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI10_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_rint_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_rint_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v17, v0, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: lui a2, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv32f16( %va, %m, i32 %evl) ret %v } define @vp_rint_nxv32f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_rint_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI11_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_rint_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_rint_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v17, v16, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: lui a2, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.rint.nxv32f16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -1,47 +1,91 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.round.nxv1f16(, , i32) define @vp_round_nxv1f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_round_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv1f16( %va, %m, i32 %evl) ret %v } define @vp_round_nxv1f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_round_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.round.nxv1f16( %va, %m, i32 %evl) @@ -51,41 +95,81 @@ declare @llvm.vp.round.nxv2f16(, , i32) define @vp_round_nxv2f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_round_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv2f16( %va, %m, i32 %evl) ret %v } define @vp_round_nxv2f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_round_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.round.nxv2f16( %va, %m, i32 %evl) @@ -95,41 +179,83 @@ declare @llvm.vp.round.nxv4f16(, , i32) define @vp_round_nxv4f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_round_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv4f16( %va, %m, i32 %evl) ret %v } define @vp_round_nxv4f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_round_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.round.nxv4f16( %va, %m, i32 %evl) @@ -139,43 +265,85 @@ declare @llvm.vp.round.nxv8f16(, , i32) define @vp_round_nxv8f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI6_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_round_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv8f16( %va, %m, i32 %evl) ret %v } define @vp_round_nxv8f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI7_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_round_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.round.nxv8f16( %va, %m, i32 %evl) @@ -185,43 +353,85 @@ declare @llvm.vp.round.nxv16f16(, , i32) define @vp_round_nxv16f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI8_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_round_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv16f16( %va, %m, i32 %evl) ret %v } define @vp_round_nxv16f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI9_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_round_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.round.nxv16f16( %va, %m, i32 %evl) @@ -231,43 +441,180 @@ declare @llvm.vp.round.nxv32f16(, , i32) define @vp_round_nxv32f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI10_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_round_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v17, v0, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: lui a2, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a2, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv32f16( %va, %m, i32 %evl) ret %v } define @vp_round_nxv32f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_round_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI11_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_round_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_round_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v17, v16, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: lui a2, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a2, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.round.nxv32f16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -1,47 +1,91 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.roundeven.nxv1f16(, , i32) define @vp_roundeven_nxv1f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundeven_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv1f16( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv1f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundeven_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundeven.nxv1f16( %va, %m, i32 %evl) @@ -51,41 +95,81 @@ declare @llvm.vp.roundeven.nxv2f16(, , i32) define @vp_roundeven_nxv2f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundeven_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv2f16( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv2f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundeven_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundeven.nxv2f16( %va, %m, i32 %evl) @@ -95,41 +179,83 @@ declare @llvm.vp.roundeven.nxv4f16(, , i32) define @vp_roundeven_nxv4f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundeven_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv4f16( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv4f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundeven_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundeven.nxv4f16( %va, %m, i32 %evl) @@ -139,43 +265,85 @@ declare @llvm.vp.roundeven.nxv8f16(, , i32) define @vp_roundeven_nxv8f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI6_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundeven_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv8f16( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv8f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI7_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundeven_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundeven.nxv8f16( %va, %m, i32 %evl) @@ -185,43 +353,85 @@ declare @llvm.vp.roundeven.nxv16f16(, , i32) define @vp_roundeven_nxv16f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI8_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundeven_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv16f16( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv16f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI9_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundeven_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundeven.nxv16f16( %va, %m, i32 %evl) @@ -231,43 +441,180 @@ declare @llvm.vp.roundeven.nxv32f16(, , i32) define @vp_roundeven_nxv32f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI10_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundeven_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v17, v0, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: lui a2, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a2, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv32f16( %va, %m, i32 %evl) ret %v } define @vp_roundeven_nxv32f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundeven_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI11_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundeven_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundeven_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v17, v16, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: lui a2, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a2, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundeven.nxv32f16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -1,47 +1,91 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.roundtozero.nxv1f16(, , i32) define @vp_roundtozero_nxv1f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundtozero_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv1f16( %va, %m, i32 %evl) ret %v } define @vp_roundtozero_nxv1f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundtozero_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundtozero.nxv1f16( %va, %m, i32 %evl) @@ -51,41 +95,81 @@ declare @llvm.vp.roundtozero.nxv2f16(, , i32) define @vp_roundtozero_nxv2f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundtozero_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv2f16( %va, %m, i32 %evl) ret %v } define @vp_roundtozero_nxv2f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundtozero_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v9 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundtozero.nxv2f16( %va, %m, i32 %evl) @@ -95,41 +179,83 @@ declare @llvm.vp.roundtozero.nxv4f16(, , i32) define @vp_roundtozero_nxv4f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundtozero_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv4f16( %va, %m, i32 %evl) ret %v } define @vp_roundtozero_nxv4f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundtozero_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v9, v8 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v10 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundtozero.nxv4f16( %va, %m, i32 %evl) @@ -139,43 +265,85 @@ declare @llvm.vp.roundtozero.nxv8f16(, , i32) define @vp_roundtozero_nxv8f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI6_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v12, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundtozero_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v10, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv8f16( %va, %m, i32 %evl) ret %v } define @vp_roundtozero_nxv8f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI7_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundtozero_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v10, v8 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v12 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundtozero.nxv8f16( %va, %m, i32 %evl) @@ -185,43 +353,85 @@ declare @llvm.vp.roundtozero.nxv16f16(, , i32) define @vp_roundtozero_nxv16f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI8_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI8_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundtozero_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v12, v0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv16f16( %va, %m, i32 %evl) ret %v } define @vp_roundtozero_nxv16f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI9_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI9_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundtozero_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v12, v8 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v16 +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundtozero.nxv16f16( %va, %m, i32 %evl) @@ -231,43 +441,180 @@ declare @llvm.vp.roundtozero.nxv32f16(, , i32) define @vp_roundtozero_nxv32f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI10_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundtozero_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t +; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v17, v0, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: lui a2, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a2, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv32f16( %va, %m, i32 %evl) ret %v } define @vp_roundtozero_nxv32f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vp_roundtozero_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI11_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI11_0)(a1) -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_roundtozero_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1) +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v16, v8 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t +; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_roundtozero_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v17, v16, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: lui a2, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a2 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a2, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t +; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.roundtozero.nxv32f16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll @@ -1,27 +1,53 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.copysign.nxv1f16(, , , i32) define @vfsgnj_vv_nxv1f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsgnj_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.copysign.nxv1f16( %va, %vb, %m, i32 %evl) ret %v } define @vfsgnj_vv_nxv1f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsgnj_vv_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.copysign.nxv1f16( %va, %vb, %m, i32 %evl) @@ -31,21 +57,43 @@ declare @llvm.vp.copysign.nxv2f16(, , , i32) define @vfsgnj_vv_nxv2f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsgnj_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.copysign.nxv2f16( %va, %vb, %m, i32 %evl) ret %v } define @vfsgnj_vv_nxv2f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsgnj_vv_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.copysign.nxv2f16( %va, %vb, %m, i32 %evl) @@ -55,21 +103,43 @@ declare @llvm.vp.copysign.nxv4f16(, , , i32) define @vfsgnj_vv_nxv4f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsgnj_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.copysign.nxv4f16( %va, %vb, %m, i32 %evl) ret %v } define @vfsgnj_vv_nxv4f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsgnj_vv_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.copysign.nxv4f16( %va, %vb, %m, i32 %evl) @@ -79,21 +149,43 @@ declare @llvm.vp.copysign.nxv8f16(, , , i32) define @vfsgnj_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v10, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsgnj_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.copysign.nxv8f16( %va, %vb, %m, i32 %evl) ret %v } define @vfsgnj_vv_nxv8f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsgnj_vv_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.copysign.nxv8f16( %va, %vb, %m, i32 %evl) @@ -103,21 +195,43 @@ declare @llvm.vp.copysign.nxv16f16(, , , i32) define @vfsgnj_vv_nxv16f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v12, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsgnj_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v12, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.copysign.nxv16f16( %va, %vb, %m, i32 %evl) ret %v } define @vfsgnj_vv_nxv16f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsgnj_vv_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.copysign.nxv16f16( %va, %vb, %m, i32 %evl) @@ -127,21 +241,89 @@ declare @llvm.vp.copysign.nxv32f16(, , , i32) define @vfsgnj_vv_nxv32f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v16, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsgnj_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v16, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB10_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.copysign.nxv32f16( %va, %vb, %m, i32 %evl) ret %v } define @vfsgnj_vv_nxv32f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfsgnj_vv_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsgnj_vv_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: vmset.m v0 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB11_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.copysign.nxv32f16( %va, %vb, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll @@ -1,17 +1,31 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.fabs.nxv1f16() define @vfabs_nxv1f16( %v) { -; CHECK-LABEL: vfabs_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %r = call @llvm.fabs.nxv1f16( %v) ret %r } @@ -19,11 +33,21 @@ declare @llvm.fabs.nxv2f16() define @vfabs_nxv2f16( %v) { -; CHECK-LABEL: vfabs_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %r = call @llvm.fabs.nxv2f16( %v) ret %r } @@ -31,11 +55,21 @@ declare @llvm.fabs.nxv4f16() define @vfabs_nxv4f16( %v) { -; CHECK-LABEL: vfabs_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %r = call @llvm.fabs.nxv4f16( %v) ret %r } @@ -43,11 +77,21 @@ declare @llvm.fabs.nxv8f16() define @vfabs_nxv8f16( %v) { -; CHECK-LABEL: vfabs_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %r = call @llvm.fabs.nxv8f16( %v) ret %r } @@ -55,11 +99,21 @@ declare @llvm.fabs.nxv16f16() define @vfabs_nxv16f16( %v) { -; CHECK-LABEL: vfabs_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %r = call @llvm.fabs.nxv16f16( %v) ret %r } @@ -67,11 +121,26 @@ declare @llvm.fabs.nxv32f16() define @vfabs_nxv32f16( %v) { -; CHECK-LABEL: vfabs_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %r = call @llvm.fabs.nxv32f16( %v) ret %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll @@ -1,27 +1,51 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.fabs.nxv1f16(, , i32) define @vfabs_vv_nxv1f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfabs_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fabs.nxv1f16( %va, %m, i32 %evl) ret %v } define @vfabs_vv_nxv1f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfabs_vv_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_vv_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v9 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fabs.nxv1f16( %va, %m, i32 %evl) @@ -31,21 +55,41 @@ declare @llvm.vp.fabs.nxv2f16(, , i32) define @vfabs_vv_nxv2f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfabs_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fabs.nxv2f16( %va, %m, i32 %evl) ret %v } define @vfabs_vv_nxv2f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfabs_vv_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_vv_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v9, v9 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fabs.nxv2f16( %va, %m, i32 %evl) @@ -55,21 +99,41 @@ declare @llvm.vp.fabs.nxv4f16(, , i32) define @vfabs_vv_nxv4f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfabs_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fabs.nxv4f16( %va, %m, i32 %evl) ret %v } define @vfabs_vv_nxv4f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfabs_vv_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_vv_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v10, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fabs.nxv4f16( %va, %m, i32 %evl) @@ -79,21 +143,41 @@ declare @llvm.vp.fabs.nxv8f16(, , i32) define @vfabs_vv_nxv8f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfabs_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fabs.nxv8f16( %va, %m, i32 %evl) ret %v } define @vfabs_vv_nxv8f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfabs_vv_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_vv_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfabs.v v12, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fabs.nxv8f16( %va, %m, i32 %evl) @@ -103,21 +187,41 @@ declare @llvm.vp.fabs.nxv16f16(, , i32) define @vfabs_vv_nxv16f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfabs_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fabs.nxv16f16( %va, %m, i32 %evl) ret %v } define @vfabs_vv_nxv16f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfabs_vv_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_vv_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fabs.nxv16f16( %va, %m, i32 %evl) @@ -127,21 +231,82 @@ declare @llvm.vp.fabs.nxv32f16(, , i32) define @vfabs_vv_nxv32f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfabs_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fabs.nxv32f16( %va, %m, i32 %evl) ret %v } define @vfabs_vv_nxv32f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfabs_vv_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfabs_vv_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfabs.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fabs.nxv32f16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll @@ -1,25 +1,55 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN define @vfadd_vv_nxv1f16( %va, %vb) { -; CHECK-LABEL: vfadd_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %vc = fadd %va, %vb ret %vc } define @vfadd_vf_nxv1f16( %va, half %b) { -; CHECK-LABEL: vfadd_vf_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fadd %va, %splat @@ -27,21 +57,47 @@ } define @vfadd_vv_nxv2f16( %va, %vb) { -; CHECK-LABEL: vfadd_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %vc = fadd %va, %vb ret %vc } define @vfadd_vf_nxv2f16( %va, half %b) { -; CHECK-LABEL: vfadd_vf_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fadd %va, %splat @@ -49,21 +105,47 @@ } define @vfadd_vv_nxv4f16( %va, %vb) { -; CHECK-LABEL: vfadd_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %vc = fadd %va, %vb ret %vc } define @vfadd_vf_nxv4f16( %va, half %b) { -; CHECK-LABEL: vfadd_vf_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fadd %va, %splat @@ -71,21 +153,47 @@ } define @vfadd_vv_nxv8f16( %va, %vb) { -; CHECK-LABEL: vfadd_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %vc = fadd %va, %vb ret %vc } define @vfadd_vf_nxv8f16( %va, half %b) { -; CHECK-LABEL: vfadd_vf_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fadd %va, %splat @@ -93,11 +201,26 @@ } define @vfadd_fv_nxv8f16( %va, half %b) { -; CHECK-LABEL: vfadd_fv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_fv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_fv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fadd %splat, %va @@ -105,21 +228,47 @@ } define @vfadd_vv_nxv16f16( %va, %vb) { -; CHECK-LABEL: vfadd_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %vc = fadd %va, %vb ret %vc } define @vfadd_vf_nxv16f16( %va, half %b) { -; CHECK-LABEL: vfadd_vf_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fadd %va, %splat @@ -127,21 +276,58 @@ } define @vfadd_vv_nxv32f16( %va, %vb) { -; CHECK-LABEL: vfadd_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v24, v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %vc = fadd %va, %vb ret %vc } define @vfadd_vf_nxv32f16( %va, half %b) { -; CHECK-LABEL: vfadd_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fadd %va, %splat diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -1,27 +1,53 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.fadd.nxv1f16(, , , i32) define @vfadd_vv_nxv1f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fadd.nxv1f16( %va, %b, %m, i32 %evl) ret %v } define @vfadd_vv_nxv1f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vv_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv1f16( %va, %b, %m, i32 %evl) @@ -29,11 +55,26 @@ } define @vfadd_vf_nxv1f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv1f16( %va, %vb, %m, i32 %evl) @@ -41,11 +82,26 @@ } define @vfadd_vf_nxv1f16_commute( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv1f16_commute: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv1f16_commute: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv1f16_commute: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv1f16( %vb, %va, %m, i32 %evl) @@ -53,11 +109,26 @@ } define @vfadd_vf_nxv1f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -67,11 +138,26 @@ } define @vfadd_vf_nxv1f16_unmasked_commute( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv1f16_unmasked_commute: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv1f16_unmasked_commute: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv1f16_unmasked_commute: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v8, v9 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -83,21 +169,43 @@ declare @llvm.vp.fadd.nxv2f16(, , , i32) define @vfadd_vv_nxv2f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fadd.nxv2f16( %va, %b, %m, i32 %evl) ret %v } define @vfadd_vv_nxv2f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vv_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv2f16( %va, %b, %m, i32 %evl) @@ -105,11 +213,26 @@ } define @vfadd_vf_nxv2f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv2f16( %va, %vb, %m, i32 %evl) @@ -117,11 +240,26 @@ } define @vfadd_vf_nxv2f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -133,21 +271,43 @@ declare @llvm.vp.fadd.nxv4f16(, , , i32) define @vfadd_vv_nxv4f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fadd.nxv4f16( %va, %b, %m, i32 %evl) ret %v } define @vfadd_vv_nxv4f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vv_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv4f16( %va, %b, %m, i32 %evl) @@ -155,11 +315,26 @@ } define @vfadd_vf_nxv4f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv4f16( %va, %vb, %m, i32 %evl) @@ -167,11 +342,26 @@ } define @vfadd_vf_nxv4f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -183,21 +373,43 @@ declare @llvm.vp.fadd.nxv8f16(, , , i32) define @vfadd_vv_nxv8f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v10, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fadd.nxv8f16( %va, %b, %m, i32 %evl) ret %v } define @vfadd_vv_nxv8f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vv_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv8f16( %va, %b, %m, i32 %evl) @@ -205,11 +417,26 @@ } define @vfadd_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv8f16( %va, %vb, %m, i32 %evl) @@ -217,11 +444,26 @@ } define @vfadd_vf_nxv8f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -233,21 +475,43 @@ declare @llvm.vp.fadd.nxv16f16(, , , i32) define @vfadd_vv_nxv16f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v12, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v12, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fadd.nxv16f16( %va, %b, %m, i32 %evl) ret %v } define @vfadd_vv_nxv16f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vv_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv16f16( %va, %b, %m, i32 %evl) @@ -255,11 +519,26 @@ } define @vfadd_vf_nxv16f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv16f16( %va, %vb, %m, i32 %evl) @@ -267,11 +546,26 @@ } define @vfadd_vf_nxv16f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -283,21 +577,89 @@ declare @llvm.vp.fadd.nxv32f16(, , , i32) define @vfadd_vv_nxv32f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v16, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v16, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB22_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB22_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fadd.nxv32f16( %va, %b, %m, i32 %evl) ret %v } define @vfadd_vv_nxv32f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vv_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vv_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vv_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: vmset.m v0 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB23_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB23_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv32f16( %va, %b, %m, i32 %evl) @@ -305,11 +667,47 @@ } define @vfadd_vf_nxv32f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v1, v0 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB24_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB24_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fadd.nxv32f16( %va, %vb, %m, i32 %evl) @@ -317,11 +715,48 @@ } define @vfadd_vf_nxv32f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfadd_vf_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfadd_vf_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfadd.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfadd_vf_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v1 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v1, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB25_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB25_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll @@ -1,27 +1,57 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.copysign.nxv1f16(, ) define @vfcopysign_vv_nxv1f16( %vm, %vs) { -; CHECK-LABEL: vfcopysign_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %r = call @llvm.copysign.nxv1f16( %vm, %vs) ret %r } define @vfcopysign_vf_nxv1f16( %vm, half %s) { -; CHECK-LABEL: vfcopysign_vf_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_vf_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_vf_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv1f16( %vm, %splat) @@ -29,22 +59,58 @@ } define @vfcopynsign_vv_nxv1f16( %vm, %vs) { -; CHECK-LABEL: vfcopynsign_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %n = fneg %vs %r = call @llvm.copysign.nxv1f16( %vm, %n) ret %r } define @vfcopynsign_vf_nxv1f16( %vm, half %s) { -; CHECK-LABEL: vfcopynsign_vf_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_vf_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfsgnjn.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_vf_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -53,26 +119,52 @@ } define @vfcopysign_exttrunc_vv_nxv1f16_nxv1f32( %vm, %vs) { -; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v9 -; CHECK-NEXT: vfsgnj.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v9 +; ZVFH-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %e = fptrunc %vs to %r = call @llvm.copysign.nxv1f16( %vm, %e) ret %r } define @vfcopysign_exttrunc_vf_nxv1f16_nxv1f32( %vm, float %s) { -; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v9 -; CHECK-NEXT: vfsgnj.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFH-NEXT: vfmv.v.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v9 +; ZVFH-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, float %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %esplat = fptrunc %splat to @@ -81,12 +173,29 @@ } define @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32( %vm, %vs) { -; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v9 -; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v9 +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %n = fneg %vs %eneg = fptrunc %n to %r = call @llvm.copysign.nxv1f16( %vm, %eneg) @@ -94,14 +203,33 @@ } define @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32( %vm, float %s) { -; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v9 -; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFH-NEXT: vfmv.v.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v9 +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, float %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -111,30 +239,60 @@ } define @vfcopysign_exttrunc_vv_nxv1f16_nxv1f64( %vm, %vs) { -; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfncvt.rod.f.f.w v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v9, v10 -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFH-NEXT: vfncvt.rod.f.f.w v10, v9 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v9, v10 +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %e = fptrunc %vs to %r = call @llvm.copysign.nxv1f16( %vm, %e) ret %r } define @vfcopysign_exttrunc_vf_nxv1f16_nxv1f64( %vm, double %s) { -; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfncvt.rod.f.f.w v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v9, v10 -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; ZVFH-NEXT: vfmv.v.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFH-NEXT: vfncvt.rod.f.f.w v10, v9 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v9, v10 +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, double %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %esplat = fptrunc %splat to @@ -143,14 +301,34 @@ } define @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64( %vm, %vs) { -; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfncvt.rod.f.f.w v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v9, v10 -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFH-NEXT: vfncvt.rod.f.f.w v10, v9 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v9, v10 +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %n = fneg %vs %eneg = fptrunc %n to %r = call @llvm.copysign.nxv1f16( %vm, %eneg) @@ -158,16 +336,38 @@ } define @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64( %vm, double %s) { -; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfncvt.rod.f.f.w v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v9, v10 -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; ZVFH-NEXT: vfmv.v.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFH-NEXT: vfncvt.rod.f.f.w v10, v9 +; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v9, v10 +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, double %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -179,21 +379,47 @@ declare @llvm.copysign.nxv2f16(, ) define @vfcopysign_vv_nxv2f16( %vm, %vs) { -; CHECK-LABEL: vfcopysign_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %r = call @llvm.copysign.nxv2f16( %vm, %vs) ret %r } define @vfcopysign_vf_nxv2f16( %vm, half %s) { -; CHECK-LABEL: vfcopysign_vf_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_vf_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_vf_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv2f16( %vm, %splat) @@ -201,22 +427,58 @@ } define @vfcopynsign_vv_nxv2f16( %vm, %vs) { -; CHECK-LABEL: vfcopynsign_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %n = fneg %vs %r = call @llvm.copysign.nxv2f16( %vm, %n) ret %r } define @vfcopynsign_vf_nxv2f16( %vm, half %s) { -; CHECK-LABEL: vfcopynsign_vf_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_vf_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfsgnjn.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_vf_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -227,21 +489,47 @@ declare @llvm.copysign.nxv4f16(, ) define @vfcopysign_vv_nxv4f16( %vm, %vs) { -; CHECK-LABEL: vfcopysign_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %r = call @llvm.copysign.nxv4f16( %vm, %vs) ret %r } define @vfcopysign_vf_nxv4f16( %vm, half %s) { -; CHECK-LABEL: vfcopysign_vf_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_vf_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_vf_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv4f16( %vm, %splat) @@ -249,22 +537,58 @@ } define @vfcopynsign_vv_nxv4f16( %vm, %vs) { -; CHECK-LABEL: vfcopynsign_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %n = fneg %vs %r = call @llvm.copysign.nxv4f16( %vm, %n) ret %r } define @vfcopynsign_vf_nxv4f16( %vm, half %s) { -; CHECK-LABEL: vfcopynsign_vf_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_vf_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfsgnjn.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_vf_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -275,21 +599,47 @@ declare @llvm.copysign.nxv8f16(, ) define @vfcopysign_vv_nxv8f16( %vm, %vs) { -; CHECK-LABEL: vfcopysign_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %r = call @llvm.copysign.nxv8f16( %vm, %vs) ret %r } define @vfcopysign_vf_nxv8f16( %vm, half %s) { -; CHECK-LABEL: vfcopysign_vf_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_vf_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_vf_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv8f16( %vm, %splat) @@ -297,22 +647,58 @@ } define @vfcopynsign_vv_nxv8f16( %vm, %vs) { -; CHECK-LABEL: vfcopynsign_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v12, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %n = fneg %vs %r = call @llvm.copysign.nxv8f16( %vm, %n) ret %r } define @vfcopynsign_vf_nxv8f16( %vm, half %s) { -; CHECK-LABEL: vfcopynsign_vf_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_vf_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfsgnjn.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_vf_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v12, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -321,26 +707,52 @@ } define @vfcopysign_exttrunc_vv_nxv8f16_nxv8f32( %vm, %vs) { -; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v12 -; CHECK-NEXT: vfsgnj.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v12 +; ZVFH-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %e = fptrunc %vs to %r = call @llvm.copysign.nxv8f16( %vm, %e) ret %r } define @vfcopysign_exttrunc_vf_nxv8f16_nxv8f32( %vm, float %s) { -; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v12 -; CHECK-NEXT: vfsgnj.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFH-NEXT: vfmv.v.f v12, fa0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v12 +; ZVFH-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, float %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %esplat = fptrunc %splat to @@ -349,12 +761,29 @@ } define @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32( %vm, %vs) { -; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v12 -; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v12 +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %n = fneg %vs %eneg = fptrunc %n to %r = call @llvm.copysign.nxv8f16( %vm, %eneg) @@ -362,14 +791,33 @@ } define @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32( %vm, float %s) { -; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v12 -; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFH-NEXT: vfmv.v.f v12, fa0 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v12 +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, float %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -379,30 +827,60 @@ } define @vfcopysign_exttrunc_vv_nxv8f16_nxv8f64( %vm, %vs) { -; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.rod.f.f.w v12, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v12 -; CHECK-NEXT: vfsgnj.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFH-NEXT: vfncvt.rod.f.f.w v12, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v12 +; ZVFH-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %e = fptrunc %vs to %r = call @llvm.copysign.nxv8f16( %vm, %e) ret %r } define @vfcopysign_exttrunc_vf_nxv8f16_nxv8f64( %vm, double %s) { -; CHECK-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.rod.f.f.w v12, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v12 -; CHECK-NEXT: vfsgnj.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; ZVFH-NEXT: vfmv.v.f v16, fa0 +; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFH-NEXT: vfncvt.rod.f.f.w v12, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v12 +; ZVFH-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, double %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %esplat = fptrunc %splat to @@ -411,14 +889,34 @@ } define @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64( %vm, %vs) { -; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.rod.f.f.w v12, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v12 -; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFH-NEXT: vfncvt.rod.f.f.w v12, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v12 +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %n = fneg %vs %eneg = fptrunc %n to %r = call @llvm.copysign.nxv8f16( %vm, %eneg) @@ -426,16 +924,38 @@ } define @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64( %vm, double %s) { -; CHECK-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.rod.f.f.w v12, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v10, v12 -; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; ZVFH-NEXT: vfmv.v.f v16, fa0 +; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFH-NEXT: vfncvt.rod.f.f.w v12, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfncvt.f.f.w v10, v12 +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, double %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -447,21 +967,47 @@ declare @llvm.copysign.nxv16f16(, ) define @vfcopysign_vv_nxv16f16( %vm, %vs) { -; CHECK-LABEL: vfcopysign_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %r = call @llvm.copysign.nxv16f16( %vm, %vs) ret %r } define @vfcopysign_vf_nxv16f16( %vm, half %s) { -; CHECK-LABEL: vfcopysign_vf_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_vf_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_vf_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv16f16( %vm, %splat) @@ -469,22 +1015,58 @@ } define @vfcopynsign_vv_nxv16f16( %vm, %vs) { -; CHECK-LABEL: vfcopynsign_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfsgnjn.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v16, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %n = fneg %vs %r = call @llvm.copysign.nxv16f16( %vm, %n) ret %r } define @vfcopynsign_vf_nxv16f16( %vm, half %s) { -; CHECK-LABEL: vfcopynsign_vf_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_vf_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfsgnjn.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_vf_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v16, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -495,21 +1077,58 @@ declare @llvm.copysign.nxv32f16(, ) define @vfcopysign_vv_nxv32f16( %vm, %vs) { -; CHECK-LABEL: vfcopysign_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfsgnj.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v24, v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %r = call @llvm.copysign.nxv32f16( %vm, %vs) ret %r } define @vfcopysign_vf_nxv32f16( %vm, half %s) { -; CHECK-LABEL: vfcopysign_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopysign_vf_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopysign_vf_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv32f16( %vm, %splat) @@ -517,22 +1136,74 @@ } define @vfcopynsign_vv_nxv32f16( %vm, %vs) { -; CHECK-LABEL: vfcopynsign_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfsgnjn.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v24, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v24, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v24, v24, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %n = fneg %vs %r = call @llvm.copysign.nxv32f16( %vm, %n) ret %r } define @vfcopynsign_vf_nxv32f16( %vm, half %s) { -; CHECK-LABEL: vfcopynsign_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfcopynsign_vf_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfsgnjn.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfcopynsign_vf_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v16, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll @@ -1,25 +1,55 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN define @vfdiv_vv_nxv1f16( %va, %vb) { -; CHECK-LABEL: vfdiv_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %vc = fdiv %va, %vb ret %vc } define @vfdiv_vf_nxv1f16( %va, half %b) { -; CHECK-LABEL: vfdiv_vf_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fdiv %va, %splat @@ -27,21 +57,47 @@ } define @vfdiv_vv_nxv2f16( %va, %vb) { -; CHECK-LABEL: vfdiv_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %vc = fdiv %va, %vb ret %vc } define @vfdiv_vf_nxv2f16( %va, half %b) { -; CHECK-LABEL: vfdiv_vf_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fdiv %va, %splat @@ -49,21 +105,47 @@ } define @vfdiv_vv_nxv4f16( %va, %vb) { -; CHECK-LABEL: vfdiv_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %vc = fdiv %va, %vb ret %vc } define @vfdiv_vf_nxv4f16( %va, half %b) { -; CHECK-LABEL: vfdiv_vf_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fdiv %va, %splat @@ -71,21 +153,47 @@ } define @vfdiv_vv_nxv8f16( %va, %vb) { -; CHECK-LABEL: vfdiv_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %vc = fdiv %va, %vb ret %vc } define @vfdiv_vf_nxv8f16( %va, half %b) { -; CHECK-LABEL: vfdiv_vf_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fdiv %va, %splat @@ -93,11 +201,26 @@ } define @vfdiv_fv_nxv8f16( %va, half %b) { -; CHECK-LABEL: vfdiv_fv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_fv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_fv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fdiv %splat, %va @@ -105,21 +228,47 @@ } define @vfdiv_vv_nxv16f16( %va, %vb) { -; CHECK-LABEL: vfdiv_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %vc = fdiv %va, %vb ret %vc } define @vfdiv_vf_nxv16f16( %va, half %b) { -; CHECK-LABEL: vfdiv_vf_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fdiv %va, %splat @@ -127,21 +276,58 @@ } define @vfdiv_vv_nxv32f16( %va, %vb) { -; CHECK-LABEL: vfdiv_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v24, v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %vc = fdiv %va, %vb ret %vc } define @vfdiv_vf_nxv32f16( %va, half %b) { -; CHECK-LABEL: vfdiv_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fdiv %va, %splat diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll @@ -1,27 +1,53 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.fdiv.nxv1f16(, , , i32) define @vfdiv_vv_nxv1f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fdiv.nxv1f16( %va, %b, %m, i32 %evl) ret %v } define @vfdiv_vv_nxv1f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vv_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fdiv.nxv1f16( %va, %b, %m, i32 %evl) @@ -29,11 +55,26 @@ } define @vfdiv_vf_nxv1f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vf_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fdiv.nxv1f16( %va, %vb, %m, i32 %evl) @@ -41,11 +82,26 @@ } define @vfdiv_vf_nxv1f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vf_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -57,21 +113,43 @@ declare @llvm.vp.fdiv.nxv2f16(, , , i32) define @vfdiv_vv_nxv2f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fdiv.nxv2f16( %va, %b, %m, i32 %evl) ret %v } define @vfdiv_vv_nxv2f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vv_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fdiv.nxv2f16( %va, %b, %m, i32 %evl) @@ -79,11 +157,26 @@ } define @vfdiv_vf_nxv2f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vf_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fdiv.nxv2f16( %va, %vb, %m, i32 %evl) @@ -91,11 +184,26 @@ } define @vfdiv_vf_nxv2f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vf_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -107,21 +215,43 @@ declare @llvm.vp.fdiv.nxv4f16(, , , i32) define @vfdiv_vv_nxv4f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fdiv.nxv4f16( %va, %b, %m, i32 %evl) ret %v } define @vfdiv_vv_nxv4f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vv_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fdiv.nxv4f16( %va, %b, %m, i32 %evl) @@ -129,11 +259,26 @@ } define @vfdiv_vf_nxv4f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vf_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fdiv.nxv4f16( %va, %vb, %m, i32 %evl) @@ -141,11 +286,26 @@ } define @vfdiv_vf_nxv4f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vf_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -157,21 +317,43 @@ declare @llvm.vp.fdiv.nxv8f16(, , , i32) define @vfdiv_vv_nxv8f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v10, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fdiv.nxv8f16( %va, %b, %m, i32 %evl) ret %v } define @vfdiv_vv_nxv8f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vv_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fdiv.nxv8f16( %va, %b, %m, i32 %evl) @@ -179,11 +361,26 @@ } define @vfdiv_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vf_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fdiv.nxv8f16( %va, %vb, %m, i32 %evl) @@ -191,11 +388,26 @@ } define @vfdiv_vf_nxv8f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vf_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -207,21 +419,43 @@ declare @llvm.vp.fdiv.nxv16f16(, , , i32) define @vfdiv_vv_nxv16f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v12, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v12, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fdiv.nxv16f16( %va, %b, %m, i32 %evl) ret %v } define @vfdiv_vv_nxv16f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vv_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fdiv.nxv16f16( %va, %b, %m, i32 %evl) @@ -229,11 +463,26 @@ } define @vfdiv_vf_nxv16f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vf_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fdiv.nxv16f16( %va, %vb, %m, i32 %evl) @@ -241,11 +490,26 @@ } define @vfdiv_vf_nxv16f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vf_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -257,21 +521,89 @@ declare @llvm.vp.fdiv.nxv32f16(, , , i32) define @vfdiv_vv_nxv32f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v16, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v16, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB20_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB20_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fdiv.nxv32f16( %va, %b, %m, i32 %evl) ret %v } define @vfdiv_vv_nxv32f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vv_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfdiv.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vv_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfdiv.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vv_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: vmset.m v0 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB21_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB21_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fdiv.nxv32f16( %va, %b, %m, i32 %evl) @@ -279,11 +611,47 @@ } define @vfdiv_vf_nxv32f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v1, v0 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB22_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fdiv.nxv32f16( %va, %vb, %m, i32 %evl) @@ -291,11 +659,48 @@ } define @vfdiv_vf_nxv32f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfdiv_vf_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfdiv_vf_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfdiv_vf_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v1 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v1, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB23_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN ; This tests a mix of vfmacc and vfmadd by using different operand orders to ; trigger commuting in TwoAddressInstructionPass. @@ -10,21 +14,49 @@ declare @llvm.fma.v1f16(, , ) define @vfmadd_vv_nxv1f16( %va, %vb, %vc) { -; CHECK-LABEL: vfmadd_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v9, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmadd_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfmadd.vv v8, v9, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %vd = call @llvm.fma.v1f16( %va, %vb, %vc) ret %vd } define @vfmadd_vf_nxv1f16( %va, %vb, half %c) { -; CHECK-LABEL: vfmadd_vf_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmadd_vf_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfmadd.vf v8, fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vf_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %c, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vd = call @llvm.fma.v1f16( %va, %splat, %vb) @@ -34,21 +66,49 @@ declare @llvm.fma.v2f16(, , ) define @vfmadd_vv_nxv2f16( %va, %vb, %vc) { -; CHECK-LABEL: vfmadd_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v10, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmadd_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfmadd.vv v8, v10, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %vd = call @llvm.fma.v2f16( %va, %vc, %vb) ret %vd } define @vfmadd_vf_nxv2f16( %va, %vb, half %c) { -; CHECK-LABEL: vfmadd_vf_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmadd_vf_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfmacc.vf v8, fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vf_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %c, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vd = call @llvm.fma.v2f16( %vb, %splat, %va) @@ -58,21 +118,49 @@ declare @llvm.fma.v4f16(, , ) define @vfmadd_vv_nxv4f16( %va, %vb, %vc) { -; CHECK-LABEL: vfmadd_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v9, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmadd_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmadd.vv v8, v9, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 +; ZVFHMIN-NEXT: ret %vd = call @llvm.fma.v4f16( %vb, %va, %vc) ret %vd } define @vfmadd_vf_nxv4f16( %va, %vb, half %c) { -; CHECK-LABEL: vfmadd_vf_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmadd_vf_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmadd.vf v8, fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vf_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %c, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vd = call @llvm.fma.v4f16( %va, %splat, %vb) @@ -82,21 +170,49 @@ declare @llvm.fma.v8f16(, , ) define @vfmadd_vv_nxv8f16( %va, %vb, %vc) { -; CHECK-LABEL: vfmadd_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfmacc.vv v8, v12, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmadd_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfmacc.vv v8, v12, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: ret %vd = call @llvm.fma.v8f16( %vb, %vc, %va) ret %vd } define @vfmadd_vf_nxv8f16( %va, %vb, half %c) { -; CHECK-LABEL: vfmadd_vf_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfmacc.vf v8, fa0, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmadd_vf_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfmacc.vf v8, fa0, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vf_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %c, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vd = call @llvm.fma.v8f16( %vb, %splat, %va) @@ -106,21 +222,64 @@ declare @llvm.fma.v16f16(, , ) define @vfmadd_vv_nxv16f16( %va, %vb, %vc) { -; CHECK-LABEL: vfmadd_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v16, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmadd_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfmadd.vv v8, v16, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %vd = call @llvm.fma.v16f16( %vc, %va, %vb) ret %vd } define @vfmadd_vf_nxv16f16( %va, %vb, half %c) { -; CHECK-LABEL: vfmadd_vf_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfmadd.vf v8, fa0, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmadd_vf_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfmadd.vf v8, fa0, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vf_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: sub sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; ZVFHMIN-NEXT: vmv4r.v v28, v12 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %c, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vd = call @llvm.fma.v16f16( %va, %splat, %vb) @@ -130,22 +289,151 @@ declare @llvm.fma.v32f16(, , ) define @vfmadd_vv_nxv32f16( %va, %vb, %vc) { -; CHECK-LABEL: vfmadd_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmacc.vv v8, v16, v24 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmadd_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vl8re16.v v24, (a0) +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfmacc.vv v8, v16, v24 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: vmv8r.v v0, v16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv8r.v v16, v8 +; ZVFHMIN-NEXT: vl8re16.v v24, (a0) +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v0 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %vd = call @llvm.fma.v32f16( %vc, %vb, %va) ret %vd } define @vfmadd_vf_nxv32f16( %va, %vb, half %c) { -; CHECK-LABEL: vfmadd_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmacc.vf v8, fa0, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmadd_vf_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfmacc.vf v8, fa0, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmadd_vf_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: sub sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v24 +; ZVFHMIN-NEXT: vmv8r.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v0 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %c, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vd = call @llvm.fma.v32f16( %vb, %splat, %va) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll @@ -1,27 +1,57 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.maxnum.nxv1f16(, ) define @vfmax_nxv1f16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv1f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv1f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv1f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.maxnum.nxv1f16( %a, %b) ret %v } define @vfmax_nxv1f16_vf( %a, half %b) { -; CHECK-LABEL: vfmax_nxv1f16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv1f16_vf: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfmax.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv1f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.maxnum.nxv1f16( %a, %splat) @@ -31,21 +61,47 @@ declare @llvm.maxnum.nxv2f16(, ) define @vfmax_nxv2f16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv2f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv2f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv2f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.maxnum.nxv2f16( %a, %b) ret %v } define @vfmax_nxv2f16_vf( %a, half %b) { -; CHECK-LABEL: vfmax_nxv2f16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv2f16_vf: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfmax.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv2f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.maxnum.nxv2f16( %a, %splat) @@ -55,21 +111,47 @@ declare @llvm.maxnum.nxv4f16(, ) define @vfmax_nxv4f16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv4f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv4f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv4f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.maxnum.nxv4f16( %a, %b) ret %v } define @vfmax_nxv4f16_vf( %a, half %b) { -; CHECK-LABEL: vfmax_nxv4f16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv4f16_vf: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmax.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv4f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.maxnum.nxv4f16( %a, %splat) @@ -79,21 +161,47 @@ declare @llvm.maxnum.nxv8f16(, ) define @vfmax_nxv8f16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv8f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv8f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv8f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.maxnum.nxv8f16( %a, %b) ret %v } define @vfmax_nxv8f16_vf( %a, half %b) { -; CHECK-LABEL: vfmax_nxv8f16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv8f16_vf: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfmax.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv8f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.maxnum.nxv8f16( %a, %splat) @@ -103,21 +211,47 @@ declare @llvm.maxnum.nxv16f16(, ) define @vfmax_nxv16f16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv16f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv16f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv16f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.maxnum.nxv16f16( %a, %b) ret %v } define @vfmax_nxv16f16_vf( %a, half %b) { -; CHECK-LABEL: vfmax_nxv16f16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv16f16_vf: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfmax.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv16f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.maxnum.nxv16f16( %a, %splat) @@ -127,21 +261,58 @@ declare @llvm.maxnum.nxv32f16(, ) define @vfmax_nxv32f16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv32f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv32f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv32f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v24, v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.maxnum.nxv32f16( %a, %b) ret %v } define @vfmax_nxv32f16_vf( %a, half %b) { -; CHECK-LABEL: vfmax_nxv32f16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv32f16_vf: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfmax.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv32f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.maxnum.nxv32f16( %a, %splat) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll @@ -1,27 +1,53 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.maxnum.nxv1f16(, , , i32) define @vfmax_vv_nxv1f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maxnum.nxv1f16( %va, %vb, %m, i32 %evl) ret %v } define @vfmax_vv_nxv1f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_vv_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.maxnum.nxv1f16( %va, %vb, %m, i32 %evl) @@ -31,21 +57,43 @@ declare @llvm.vp.maxnum.nxv2f16(, , , i32) define @vfmax_vv_nxv2f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maxnum.nxv2f16( %va, %vb, %m, i32 %evl) ret %v } define @vfmax_vv_nxv2f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_vv_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.maxnum.nxv2f16( %va, %vb, %m, i32 %evl) @@ -55,21 +103,43 @@ declare @llvm.vp.maxnum.nxv4f16(, , , i32) define @vfmax_vv_nxv4f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maxnum.nxv4f16( %va, %vb, %m, i32 %evl) ret %v } define @vfmax_vv_nxv4f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_vv_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.maxnum.nxv4f16( %va, %vb, %m, i32 %evl) @@ -79,21 +149,43 @@ declare @llvm.vp.maxnum.nxv8f16(, , , i32) define @vfmax_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maxnum.nxv8f16( %va, %vb, %m, i32 %evl) ret %v } define @vfmax_vv_nxv8f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_vv_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.maxnum.nxv8f16( %va, %vb, %m, i32 %evl) @@ -103,21 +195,43 @@ declare @llvm.vp.maxnum.nxv16f16(, , , i32) define @vfmax_vv_nxv16f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v12, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maxnum.nxv16f16( %va, %vb, %m, i32 %evl) ret %v } define @vfmax_vv_nxv16f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_vv_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.maxnum.nxv16f16( %va, %vb, %m, i32 %evl) @@ -127,21 +241,89 @@ declare @llvm.vp.maxnum.nxv32f16(, , , i32) define @vfmax_vv_nxv32f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v16, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB10_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maxnum.nxv32f16( %va, %vb, %m, i32 %evl) ret %v } define @vfmax_vv_nxv32f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmax_vv_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_vv_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_vv_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: vmset.m v0 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB11_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.maxnum.nxv32f16( %va, %vb, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll @@ -1,27 +1,57 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.minnum.nxv1f16(, ) define @vfmin_nxv1f16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv1f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv1f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv1f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.minnum.nxv1f16( %a, %b) ret %v } define @vfmin_nxv1f16_vf( %a, half %b) { -; CHECK-LABEL: vfmin_nxv1f16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv1f16_vf: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfmin.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv1f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.minnum.nxv1f16( %a, %splat) @@ -31,21 +61,47 @@ declare @llvm.minnum.nxv2f16(, ) define @vfmin_nxv2f16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv2f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv2f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv2f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.minnum.nxv2f16( %a, %b) ret %v } define @vfmin_nxv2f16_vf( %a, half %b) { -; CHECK-LABEL: vfmin_nxv2f16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv2f16_vf: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfmin.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv2f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.minnum.nxv2f16( %a, %splat) @@ -55,21 +111,47 @@ declare @llvm.minnum.nxv4f16(, ) define @vfmin_nxv4f16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv4f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv4f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv4f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.minnum.nxv4f16( %a, %b) ret %v } define @vfmin_nxv4f16_vf( %a, half %b) { -; CHECK-LABEL: vfmin_nxv4f16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv4f16_vf: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmin.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv4f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.minnum.nxv4f16( %a, %splat) @@ -79,21 +161,47 @@ declare @llvm.minnum.nxv8f16(, ) define @vfmin_nxv8f16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv8f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv8f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv8f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.minnum.nxv8f16( %a, %b) ret %v } define @vfmin_nxv8f16_vf( %a, half %b) { -; CHECK-LABEL: vfmin_nxv8f16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv8f16_vf: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfmin.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv8f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.minnum.nxv8f16( %a, %splat) @@ -103,21 +211,47 @@ declare @llvm.minnum.nxv16f16(, ) define @vfmin_nxv16f16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv16f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv16f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv16f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.minnum.nxv16f16( %a, %b) ret %v } define @vfmin_nxv16f16_vf( %a, half %b) { -; CHECK-LABEL: vfmin_nxv16f16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv16f16_vf: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfmin.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv16f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.minnum.nxv16f16( %a, %splat) @@ -127,21 +261,58 @@ declare @llvm.minnum.nxv32f16(, ) define @vfmin_nxv32f16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv32f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv32f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv32f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v24, v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.minnum.nxv32f16( %a, %b) ret %v } define @vfmin_nxv32f16_vf( %a, half %b) { -; CHECK-LABEL: vfmin_nxv32f16_vf: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv32f16_vf: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfmin.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv32f16_vf: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %v = call @llvm.minnum.nxv32f16( %a, %splat) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll @@ -1,27 +1,53 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.minnum.nxv1f16(, , , i32) define @vfmin_vv_nxv1f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minnum.nxv1f16( %va, %vb, %m, i32 %evl) ret %v } define @vfmin_vv_nxv1f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_vv_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.minnum.nxv1f16( %va, %vb, %m, i32 %evl) @@ -31,21 +57,43 @@ declare @llvm.vp.minnum.nxv2f16(, , , i32) define @vfmin_vv_nxv2f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minnum.nxv2f16( %va, %vb, %m, i32 %evl) ret %v } define @vfmin_vv_nxv2f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_vv_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.minnum.nxv2f16( %va, %vb, %m, i32 %evl) @@ -55,21 +103,43 @@ declare @llvm.vp.minnum.nxv4f16(, , , i32) define @vfmin_vv_nxv4f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minnum.nxv4f16( %va, %vb, %m, i32 %evl) ret %v } define @vfmin_vv_nxv4f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_vv_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.minnum.nxv4f16( %va, %vb, %m, i32 %evl) @@ -79,21 +149,43 @@ declare @llvm.vp.minnum.nxv8f16(, , , i32) define @vfmin_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minnum.nxv8f16( %va, %vb, %m, i32 %evl) ret %v } define @vfmin_vv_nxv8f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_vv_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.minnum.nxv8f16( %va, %vb, %m, i32 %evl) @@ -103,21 +195,43 @@ declare @llvm.vp.minnum.nxv16f16(, , , i32) define @vfmin_vv_nxv16f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v12, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minnum.nxv16f16( %va, %vb, %m, i32 %evl) ret %v } define @vfmin_vv_nxv16f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_vv_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.minnum.nxv16f16( %va, %vb, %m, i32 %evl) @@ -127,21 +241,89 @@ declare @llvm.vp.minnum.nxv32f16(, , , i32) define @vfmin_vv_nxv32f16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v16, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB10_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minnum.nxv32f16( %va, %vb, %m, i32 %evl) ret %v } define @vfmin_vv_nxv32f16_unmasked( %va, %vb, i32 zeroext %evl) { -; CHECK-LABEL: vfmin_vv_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_vv_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_vv_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: vmset.m v0 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB11_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.minnum.nxv32f16( %va, %vb, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll @@ -1,25 +1,55 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN define @vfmul_vv_nxv1f16( %va, %vb) { -; CHECK-LABEL: vfmul_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %vc = fmul %va, %vb ret %vc } define @vfmul_vf_nxv1f16( %va, half %b) { -; CHECK-LABEL: vfmul_vf_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fmul %va, %splat @@ -27,21 +57,47 @@ } define @vfmul_vv_nxv2f16( %va, %vb) { -; CHECK-LABEL: vfmul_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %vc = fmul %va, %vb ret %vc } define @vfmul_vf_nxv2f16( %va, half %b) { -; CHECK-LABEL: vfmul_vf_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fmul %va, %splat @@ -49,21 +105,47 @@ } define @vfmul_vv_nxv4f16( %va, %vb) { -; CHECK-LABEL: vfmul_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %vc = fmul %va, %vb ret %vc } define @vfmul_vf_nxv4f16( %va, half %b) { -; CHECK-LABEL: vfmul_vf_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fmul %va, %splat @@ -71,21 +153,47 @@ } define @vfmul_vv_nxv8f16( %va, %vb) { -; CHECK-LABEL: vfmul_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %vc = fmul %va, %vb ret %vc } define @vfmul_vf_nxv8f16( %va, half %b) { -; CHECK-LABEL: vfmul_vf_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fmul %va, %splat @@ -93,11 +201,26 @@ } define @vfmul_fv_nxv8f16( %va, half %b) { -; CHECK-LABEL: vfmul_fv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_fv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_fv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fmul %splat, %va @@ -105,21 +228,47 @@ } define @vfmul_vv_nxv16f16( %va, %vb) { -; CHECK-LABEL: vfmul_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %vc = fmul %va, %vb ret %vc } define @vfmul_vf_nxv16f16( %va, half %b) { -; CHECK-LABEL: vfmul_vf_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fmul %va, %splat @@ -127,21 +276,58 @@ } define @vfmul_vv_nxv32f16( %va, %vb) { -; CHECK-LABEL: vfmul_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v24, v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %vc = fmul %va, %vb ret %vc } define @vfmul_vf_nxv32f16( %va, half %b) { -; CHECK-LABEL: vfmul_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fmul %va, %splat diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll @@ -1,27 +1,53 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.fmul.nxv1f16(, , , i32) define @vfmul_vv_nxv1f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fmul.nxv1f16( %va, %b, %m, i32 %evl) ret %v } define @vfmul_vv_nxv1f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vv_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fmul.nxv1f16( %va, %b, %m, i32 %evl) @@ -29,11 +55,26 @@ } define @vfmul_vf_nxv1f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vf_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fmul.nxv1f16( %va, %vb, %m, i32 %evl) @@ -41,11 +82,26 @@ } define @vfmul_vf_nxv1f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vf_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -57,21 +113,43 @@ declare @llvm.vp.fmul.nxv2f16(, , , i32) define @vfmul_vv_nxv2f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fmul.nxv2f16( %va, %b, %m, i32 %evl) ret %v } define @vfmul_vv_nxv2f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vv_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fmul.nxv2f16( %va, %b, %m, i32 %evl) @@ -79,11 +157,26 @@ } define @vfmul_vf_nxv2f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vf_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fmul.nxv2f16( %va, %vb, %m, i32 %evl) @@ -91,11 +184,26 @@ } define @vfmul_vf_nxv2f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vf_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -107,21 +215,43 @@ declare @llvm.vp.fmul.nxv4f16(, , , i32) define @vfmul_vv_nxv4f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fmul.nxv4f16( %va, %b, %m, i32 %evl) ret %v } define @vfmul_vv_nxv4f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vv_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fmul.nxv4f16( %va, %b, %m, i32 %evl) @@ -129,11 +259,26 @@ } define @vfmul_vf_nxv4f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vf_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fmul.nxv4f16( %va, %vb, %m, i32 %evl) @@ -141,11 +286,26 @@ } define @vfmul_vf_nxv4f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vf_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -157,21 +317,43 @@ declare @llvm.vp.fmul.nxv8f16(, , , i32) define @vfmul_vv_nxv8f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v10, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fmul.nxv8f16( %va, %b, %m, i32 %evl) ret %v } define @vfmul_vv_nxv8f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vv_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fmul.nxv8f16( %va, %b, %m, i32 %evl) @@ -179,11 +361,26 @@ } define @vfmul_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vf_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fmul.nxv8f16( %va, %vb, %m, i32 %evl) @@ -191,11 +388,26 @@ } define @vfmul_vf_nxv8f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vf_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -207,21 +419,43 @@ declare @llvm.vp.fmul.nxv16f16(, , , i32) define @vfmul_vv_nxv16f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v12, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v12, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fmul.nxv16f16( %va, %b, %m, i32 %evl) ret %v } define @vfmul_vv_nxv16f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vv_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fmul.nxv16f16( %va, %b, %m, i32 %evl) @@ -229,11 +463,26 @@ } define @vfmul_vf_nxv16f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vf_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fmul.nxv16f16( %va, %vb, %m, i32 %evl) @@ -241,11 +490,26 @@ } define @vfmul_vf_nxv16f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vf_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -257,21 +521,89 @@ declare @llvm.vp.fmul.nxv32f16(, , , i32) define @vfmul_vv_nxv32f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v16, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v16, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB20_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB20_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fmul.nxv32f16( %va, %b, %m, i32 %evl) ret %v } define @vfmul_vv_nxv32f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vv_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmul.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vv_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfmul.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vv_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: vmset.m v0 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB21_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB21_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fmul.nxv32f16( %va, %b, %m, i32 %evl) @@ -279,11 +611,47 @@ } define @vfmul_vf_nxv32f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v1, v0 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB22_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fmul.nxv32f16( %va, %vb, %m, i32 %evl) @@ -291,11 +659,48 @@ } define @vfmul_vf_nxv32f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfmul_vf_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmul_vf_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfmul.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmul_vf_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v1 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v1, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB23_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll @@ -1,65 +1,134 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN define @vfneg_vv_nxv1f16( %va) { -; CHECK-LABEL: vfneg_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %vb = fneg %va ret %vb } define @vfneg_vv_nxv2f16( %va) { -; CHECK-LABEL: vfneg_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %vb = fneg %va ret %vb } define @vfneg_vv_nxv4f16( %va) { -; CHECK-LABEL: vfneg_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %vb = fneg %va ret %vb } define @vfneg_vv_nxv8f16( %va) { -; CHECK-LABEL: vfneg_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v12, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %vb = fneg %va ret %vb } define @vfneg_vv_nxv16f16( %va) { -; CHECK-LABEL: vfneg_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v16, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %vb = fneg %va ret %vb } define @vfneg_vv_nxv32f16( %va) { -; CHECK-LABEL: vfneg_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v16, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v16, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %vb = fneg %va ret %vb } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll @@ -1,27 +1,51 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.fneg.nxv1f16(, , i32) define @vfneg_vv_nxv1f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfneg_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfneg.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) ret %v } define @vfneg_vv_nxv1f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfneg_vv_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v9 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) @@ -31,21 +55,41 @@ declare @llvm.vp.fneg.nxv2f16(, , i32) define @vfneg_vv_nxv2f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfneg_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfneg.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) ret %v } define @vfneg_vv_nxv2f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfneg_vv_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfneg.v v9, v9 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) @@ -55,21 +99,41 @@ declare @llvm.vp.fneg.nxv4f16(, , i32) define @vfneg_vv_nxv4f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfneg_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfneg.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) ret %v } define @vfneg_vv_nxv4f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfneg_vv_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v10, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) @@ -79,21 +143,41 @@ declare @llvm.vp.fneg.nxv8f16(, , i32) define @vfneg_vv_nxv8f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfneg_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfneg.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) ret %v } define @vfneg_vv_nxv8f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfneg_vv_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfneg.v v12, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) @@ -103,21 +187,41 @@ declare @llvm.vp.fneg.nxv16f16(, , i32) define @vfneg_vv_nxv16f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfneg_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfneg.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) ret %v } define @vfneg_vv_nxv16f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfneg_vv_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v16, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) @@ -127,21 +231,82 @@ declare @llvm.vp.fneg.nxv32f16(, , i32) define @vfneg_vv_nxv32f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfneg_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfneg.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfneg.v v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) ret %v } define @vfneg_vv_nxv32f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfneg_vv_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfneg_vv_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfneg.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfneg.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfneg.v v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll @@ -1,17 +1,31 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.sqrt.nxv1f16() define @vfsqrt_nxv1f16( %v) { -; CHECK-LABEL: vfsqrt_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v9, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %r = call @llvm.sqrt.nxv1f16( %v) ret %r } @@ -19,11 +33,21 @@ declare @llvm.sqrt.nxv2f16() define @vfsqrt_nxv2f16( %v) { -; CHECK-LABEL: vfsqrt_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v9, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %r = call @llvm.sqrt.nxv2f16( %v) ret %r } @@ -31,11 +55,21 @@ declare @llvm.sqrt.nxv4f16() define @vfsqrt_nxv4f16( %v) { -; CHECK-LABEL: vfsqrt_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %r = call @llvm.sqrt.nxv4f16( %v) ret %r } @@ -43,11 +77,21 @@ declare @llvm.sqrt.nxv8f16() define @vfsqrt_nxv8f16( %v) { -; CHECK-LABEL: vfsqrt_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v12, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %r = call @llvm.sqrt.nxv8f16( %v) ret %r } @@ -55,11 +99,21 @@ declare @llvm.sqrt.nxv16f16() define @vfsqrt_nxv16f16( %v) { -; CHECK-LABEL: vfsqrt_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v16, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %r = call @llvm.sqrt.nxv16f16( %v) ret %r } @@ -67,11 +121,26 @@ declare @llvm.sqrt.nxv32f16() define @vfsqrt_nxv32f16( %v) { -; CHECK-LABEL: vfsqrt_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v16, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v16, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %r = call @llvm.sqrt.nxv32f16( %v) ret %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll @@ -1,27 +1,51 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.sqrt.nxv1f16(, , i32) define @vfsqrt_vv_nxv1f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsqrt_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.sqrt.nxv1f16( %va, %m, i32 %evl) ret %v } define @vfsqrt_vv_nxv1f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfsqrt_vv_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_vv_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v9, v9 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.sqrt.nxv1f16( %va, %m, i32 %evl) @@ -31,21 +55,41 @@ declare @llvm.vp.sqrt.nxv2f16(, , i32) define @vfsqrt_vv_nxv2f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsqrt_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.sqrt.nxv2f16( %va, %m, i32 %evl) ret %v } define @vfsqrt_vv_nxv2f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfsqrt_vv_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_vv_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v9, v9 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.sqrt.nxv2f16( %va, %m, i32 %evl) @@ -55,21 +99,41 @@ declare @llvm.vp.sqrt.nxv4f16(, , i32) define @vfsqrt_vv_nxv4f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsqrt_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v10, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.sqrt.nxv4f16( %va, %m, i32 %evl) ret %v } define @vfsqrt_vv_nxv4f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfsqrt_vv_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_vv_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v10, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.sqrt.nxv4f16( %va, %m, i32 %evl) @@ -79,21 +143,41 @@ declare @llvm.vp.sqrt.nxv8f16(, , i32) define @vfsqrt_vv_nxv8f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsqrt_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v12, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.sqrt.nxv8f16( %va, %m, i32 %evl) ret %v } define @vfsqrt_vv_nxv8f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfsqrt_vv_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_vv_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v12, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.sqrt.nxv8f16( %va, %m, i32 %evl) @@ -103,21 +187,41 @@ declare @llvm.vp.sqrt.nxv16f16(, , i32) define @vfsqrt_vv_nxv16f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsqrt_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v16, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.sqrt.nxv16f16( %va, %m, i32 %evl) ret %v } define @vfsqrt_vv_nxv16f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfsqrt_vv_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_vv_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v16, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.sqrt.nxv16f16( %va, %m, i32 %evl) @@ -127,21 +231,82 @@ declare @llvm.vp.sqrt.nxv32f16(, , i32) define @vfsqrt_vv_nxv32f16( %va, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsqrt_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v16, v0 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB10_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfsqrt.v v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.sqrt.nxv32f16( %va, %m, i32 %evl) ret %v } define @vfsqrt_vv_nxv32f16_unmasked( %va, i32 zeroext %evl) { -; CHECK-LABEL: vfsqrt_vv_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsqrt_vv_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfsqrt.v v8, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsqrt_vv_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsqrt.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB11_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vfsqrt.v v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.sqrt.nxv32f16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll @@ -1,25 +1,55 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN define @vfsub_vv_nxv1f16( %va, %vb) { -; CHECK-LABEL: vfsub_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %vc = fsub %va, %vb ret %vc } define @vfsub_vf_nxv1f16( %va, half %b) { -; CHECK-LABEL: vfsub_vf_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fsub %va, %splat @@ -27,21 +57,47 @@ } define @vfsub_vv_nxv2f16( %va, %vb) { -; CHECK-LABEL: vfsub_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %vc = fsub %va, %vb ret %vc } define @vfsub_vf_nxv2f16( %va, half %b) { -; CHECK-LABEL: vfsub_vf_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fsub %va, %splat @@ -49,21 +105,47 @@ } define @vfsub_vv_nxv4f16( %va, %vb) { -; CHECK-LABEL: vfsub_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %vc = fsub %va, %vb ret %vc } define @vfsub_vf_nxv4f16( %va, half %b) { -; CHECK-LABEL: vfsub_vf_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fsub %va, %splat @@ -71,21 +153,47 @@ } define @vfsub_vv_nxv8f16( %va, %vb) { -; CHECK-LABEL: vfsub_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %vc = fsub %va, %vb ret %vc } define @vfsub_vf_nxv8f16( %va, half %b) { -; CHECK-LABEL: vfsub_vf_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fsub %va, %splat @@ -93,11 +201,26 @@ } define @vfsub_fv_nxv8f16( %va, half %b) { -; CHECK-LABEL: vfsub_fv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_fv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vfrsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_fv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fsub %splat, %va @@ -105,21 +228,47 @@ } define @vfsub_vv_nxv16f16( %va, %vb) { -; CHECK-LABEL: vfsub_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %vc = fsub %va, %vb ret %vc } define @vfsub_vf_nxv16f16( %va, half %b) { -; CHECK-LABEL: vfsub_vf_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fsub %va, %splat @@ -127,21 +276,58 @@ } define @vfsub_vv_nxv32f16( %va, %vb) { -; CHECK-LABEL: vfsub_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v24, v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %vc = fsub %va, %vb ret %vc } define @vfsub_vf_nxv32f16( %va, half %b) { -; CHECK-LABEL: vfsub_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = fsub %va, %splat diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -1,27 +1,53 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.vp.fsub.nxv1f16(, , , i32) define @vfsub_vv_nxv1f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vv_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fsub.nxv1f16( %va, %b, %m, i32 %evl) ret %v } define @vfsub_vv_nxv1f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vv_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fsub.nxv1f16( %va, %b, %m, i32 %evl) @@ -29,11 +55,26 @@ } define @vfsub_vf_nxv1f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vf_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fsub.nxv1f16( %va, %vb, %m, i32 %evl) @@ -41,11 +82,26 @@ } define @vfsub_vf_nxv1f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vf_nxv1f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv1f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv1f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -57,21 +113,43 @@ declare @llvm.vp.fsub.nxv2f16(, , , i32) define @vfsub_vv_nxv2f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vv_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fsub.nxv2f16( %va, %b, %m, i32 %evl) ret %v } define @vfsub_vv_nxv2f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vv_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fsub.nxv2f16( %va, %b, %m, i32 %evl) @@ -79,11 +157,26 @@ } define @vfsub_vf_nxv2f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vf_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fsub.nxv2f16( %va, %vb, %m, i32 %evl) @@ -91,11 +184,26 @@ } define @vfsub_vf_nxv2f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vf_nxv2f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv2f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv2f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v9, v9, v8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -107,21 +215,43 @@ declare @llvm.vp.fsub.nxv4f16(, , , i32) define @vfsub_vv_nxv4f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vv_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v9, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v10, v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fsub.nxv4f16( %va, %b, %m, i32 %evl) ret %v } define @vfsub_vv_nxv4f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vv_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v10, v12, v10 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fsub.nxv4f16( %va, %b, %m, i32 %evl) @@ -129,11 +259,26 @@ } define @vfsub_vf_nxv4f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vf_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fsub.nxv4f16( %va, %vb, %m, i32 %evl) @@ -141,11 +286,26 @@ } define @vfsub_vf_nxv4f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vf_nxv4f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv4f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv4f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -157,21 +317,43 @@ declare @llvm.vp.fsub.nxv8f16(, , , i32) define @vfsub_vv_nxv8f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vv_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v10, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v12, v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fsub.nxv8f16( %va, %b, %m, i32 %evl) ret %v } define @vfsub_vv_nxv8f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vv_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v12, v16, v12 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fsub.nxv8f16( %va, %b, %m, i32 %evl) @@ -179,11 +361,26 @@ } define @vfsub_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vf_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fsub.nxv8f16( %va, %vb, %m, i32 %evl) @@ -191,11 +388,26 @@ } define @vfsub_vf_nxv8f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vf_nxv8f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv8f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv8f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -207,21 +419,43 @@ declare @llvm.vp.fsub.nxv16f16(, , , i32) define @vfsub_vv_nxv16f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vv_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v12, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v12, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fsub.nxv16f16( %va, %b, %m, i32 %evl) ret %v } define @vfsub_vv_nxv16f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vv_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fsub.nxv16f16( %va, %b, %m, i32 %evl) @@ -229,11 +463,26 @@ } define @vfsub_vf_nxv16f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vf_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fsub.nxv16f16( %va, %vb, %m, i32 %evl) @@ -241,11 +490,26 @@ } define @vfsub_vf_nxv16f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vf_nxv16f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv16f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv16f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 @@ -257,21 +521,89 @@ declare @llvm.vp.fsub.nxv32f16(, , , i32) define @vfsub_vv_nxv32f16( %va, %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vv_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v16, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v16, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB20_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB20_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fsub.nxv32f16( %va, %b, %m, i32 %evl) ret %v } define @vfsub_vv_nxv32f16_unmasked( %va, %b, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vv_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfsub.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vv_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfsub.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vv_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a2, a1, 1 +; ZVFHMIN-NEXT: vmset.m v0 +; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB21_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: .LBB21_2: +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vmv4r.v v4, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: sub a2, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a2 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer %v = call @llvm.vp.fsub.nxv32f16( %va, %b, %m, i32 %evl) @@ -279,11 +611,47 @@ } define @vfsub_vf_nxv32f16( %va, half %b, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vmv1r.v v1, v0 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB22_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.fsub.nxv32f16( %va, %vb, %m, i32 %evl) @@ -291,11 +659,48 @@ } define @vfsub_vf_nxv32f16_unmasked( %va, half %b, i32 zeroext %evl) { -; CHECK-LABEL: vfsub_vf_nxv32f16_unmasked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfsub_vf_nxv32f16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfsub.vf v8, v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsub_vf_nxv32f16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v1 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: sltu a4, a0, a3 +; ZVFHMIN-NEXT: addi a4, a4, -1 +; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v1, a2 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB23_2: +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %head = insertelement poison, i1 true, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -1,33 +1,63 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare half @llvm.vp.reduce.fadd.nxv1f16(half, , , i32) define half @vpreduce_fadd_nxv1f16(half %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_fadd_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vpreduce_fadd_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfredusum.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fadd_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, %v, %m, i32 %evl) ret half %r } define half @vpreduce_ord_fadd_nxv1f16(half %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_ord_fadd_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vpreduce_ord_fadd_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfredosum.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_ord_fadd_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, %v, %m, i32 %evl) ret half %r } @@ -35,27 +65,53 @@ declare half @llvm.vp.reduce.fadd.nxv2f16(half, , , i32) define half @vpreduce_fadd_nxv2f16(half %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_fadd_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vpreduce_fadd_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfredusum.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fadd_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, %v, %m, i32 %evl) ret half %r } define half @vpreduce_ord_fadd_nxv2f16(half %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_ord_fadd_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vpreduce_ord_fadd_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfredosum.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_ord_fadd_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v8, v9, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, %v, %m, i32 %evl) ret half %r } @@ -63,27 +119,53 @@ declare half @llvm.vp.reduce.fadd.nxv4f16(half, , , i32) define half @vpreduce_fadd_nxv4f16(half %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_fadd_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vpreduce_fadd_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredusum.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fadd_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, %v, %m, i32 %evl) ret half %r } define half @vpreduce_ord_fadd_nxv4f16(half %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_ord_fadd_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vpreduce_ord_fadd_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredosum.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_ord_fadd_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, %v, %m, i32 %evl) ret half %r } @@ -91,61 +173,215 @@ declare half @llvm.vp.reduce.fadd.nxv64f16(half, , , i32) define half @vpreduce_fadd_nxv64f16(half %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_fadd_nxv64f16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a2, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a2 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a1, .LBB6_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t -; CHECK-NEXT: sub a1, a0, a1 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v25 -; CHECK-NEXT: ret +; ZVFH-LABEL: vpreduce_fadd_nxv64f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: srli a2, a1, 1 +; ZVFH-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; ZVFH-NEXT: vslidedown.vx v24, v0, a2 +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: slli a1, a1, 2 +; ZVFH-NEXT: vfmv.s.f v25, fa0 +; ZVFH-NEXT: mv a2, a0 +; ZVFH-NEXT: bltu a0, a1, .LBB6_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: mv a2, a1 +; ZVFH-NEXT: .LBB6_2: +; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; ZVFH-NEXT: vfredusum.vs v25, v8, v25, v0.t +; ZVFH-NEXT: sub a1, a0, a1 +; ZVFH-NEXT: sltu a0, a0, a1 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: and a0, a0, a1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v24 +; ZVFH-NEXT: vfredusum.vs v25, v16, v25, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v25 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fadd_nxv64f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: srli a1, a3, 1 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v1, v0, a1 +; ZVFHMIN-NEXT: slli a5, a3, 2 +; ZVFHMIN-NEXT: sub a1, a0, a5 +; ZVFHMIN-NEXT: sltu a2, a0, a1 +; ZVFHMIN-NEXT: addi a2, a2, -1 +; ZVFHMIN-NEXT: and a1, a2, a1 +; ZVFHMIN-NEXT: slli a2, a3, 1 +; ZVFHMIN-NEXT: sub a4, a1, a2 +; ZVFHMIN-NEXT: sltu a6, a1, a4 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: bltu a0, a5, .LBB6_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a5 +; ZVFHMIN-NEXT: .LBB6_2: +; ZVFHMIN-NEXT: addi a5, a6, -1 +; ZVFHMIN-NEXT: vsetvli a6, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: mv a6, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB6_4 +; ZVFHMIN-NEXT: # %bb.3: +; ZVFHMIN-NEXT: mv a6, a2 +; ZVFHMIN-NEXT: .LBB6_4: +; ZVFHMIN-NEXT: and a4, a5, a4 +; ZVFHMIN-NEXT: vsetvli zero, a6, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v8, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: sub a5, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a5 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a5 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 +; ZVFHMIN-NEXT: vsetvli a5, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v8, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: bltu a1, a2, .LBB6_6 +; ZVFHMIN-NEXT: # %bb.5: +; ZVFHMIN-NEXT: mv a1, a2 +; ZVFHMIN-NEXT: .LBB6_6: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfredusum.vs v8, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v1, a3 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfredusum.vs v8, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv64f16(half %s, %v, %m, i32 %evl) ret half %r } define half @vpreduce_ord_fadd_nxv64f16(half %s, %v, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_ord_fadd_nxv64f16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a2, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a2 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a1, .LBB7_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t -; CHECK-NEXT: sub a1, a0, a1 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v25 -; CHECK-NEXT: ret +; ZVFH-LABEL: vpreduce_ord_fadd_nxv64f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: srli a2, a1, 1 +; ZVFH-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; ZVFH-NEXT: vslidedown.vx v24, v0, a2 +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: slli a1, a1, 2 +; ZVFH-NEXT: vfmv.s.f v25, fa0 +; ZVFH-NEXT: mv a2, a0 +; ZVFH-NEXT: bltu a0, a1, .LBB7_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: mv a2, a1 +; ZVFH-NEXT: .LBB7_2: +; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; ZVFH-NEXT: vfredosum.vs v25, v8, v25, v0.t +; ZVFH-NEXT: sub a1, a0, a1 +; ZVFH-NEXT: sltu a0, a0, a1 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: and a0, a0, a1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v24 +; ZVFH-NEXT: vfredosum.vs v25, v16, v25, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v25 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_ord_fadd_nxv64f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: srli a1, a3, 1 +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v1, v0, a1 +; ZVFHMIN-NEXT: slli a5, a3, 2 +; ZVFHMIN-NEXT: sub a1, a0, a5 +; ZVFHMIN-NEXT: sltu a2, a0, a1 +; ZVFHMIN-NEXT: addi a2, a2, -1 +; ZVFHMIN-NEXT: and a1, a2, a1 +; ZVFHMIN-NEXT: slli a2, a3, 1 +; ZVFHMIN-NEXT: sub a4, a1, a2 +; ZVFHMIN-NEXT: sltu a6, a1, a4 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: bltu a0, a5, .LBB7_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a5 +; ZVFHMIN-NEXT: .LBB7_2: +; ZVFHMIN-NEXT: addi a5, a6, -1 +; ZVFHMIN-NEXT: vsetvli a6, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: mv a6, a0 +; ZVFHMIN-NEXT: bltu a0, a2, .LBB7_4 +; ZVFHMIN-NEXT: # %bb.3: +; ZVFHMIN-NEXT: mv a6, a2 +; ZVFHMIN-NEXT: .LBB7_4: +; ZVFHMIN-NEXT: and a4, a5, a4 +; ZVFHMIN-NEXT: vsetvli zero, a6, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v8, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: sub a5, a0, a2 +; ZVFHMIN-NEXT: sltu a0, a0, a5 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: and a0, a0, a5 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 +; ZVFHMIN-NEXT: vsetvli a5, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v8, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: bltu a1, a2, .LBB7_6 +; ZVFHMIN-NEXT: # %bb.5: +; ZVFHMIN-NEXT: mv a1, a2 +; ZVFHMIN-NEXT: .LBB7_6: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vfredosum.vs v8, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v1, a3 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfredosum.vs v8, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv64f16(half %s, %v, %m, i32 %evl) ret half %r }