diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1426,6 +1426,8 @@ defm vmsof : RISCVMaskedUnaryMOut; defm vmsif : RISCVMaskedUnaryMOut; + defm vfcvt_rdn_x_f_v : RISCVConversion; + defm vfcvt_xu_f_v : RISCVConversion; defm vfcvt_x_f_v : RISCVConversion; defm vfcvt_rtz_xu_f_v : RISCVConversion; diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -26,6 +26,7 @@ RISCVFrameLowering.cpp RISCVGatherScatterLowering.cpp RISCVInsertVSETVLI.cpp + RISCVInsertWriteFRM.cpp RISCVInstrInfo.cpp RISCVInstructionSelector.cpp RISCVISelDAGToDAG.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -55,6 +55,9 @@ FunctionPass *createRISCVInsertVSETVLIPass(); void initializeRISCVInsertVSETVLIPass(PassRegistry &); +FunctionPass *createRISCVInsertWriteFRMPass(); +void initializeRISCVInsertWriteFRMPass(PassRegistry &); + FunctionPass *createRISCVRedundantCopyEliminationPass(); void initializeRISCVRedundantCopyEliminationPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -237,6 +237,7 @@ FMAXNUM_VL, MULHS_VL, MULHU_VL, + FCVT_X_F_VL, FP_TO_SINT_VL, FP_TO_UINT_VL, SINT_TO_FP_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1888,6 +1888,40 @@ return DAG.getSelect(DL, VT, Setcc, Truncated, Src); } +static SDValue lowerFCVTIntrinsics(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget, + unsigned RoundMode, bool IsMasked) { + SDLoc DL(Op); + MVT XLenVT = Subtarget.getXLenVT(); + MVT VT = Op.getSimpleValueType(); + SDVTList VTs = DAG.getVTList({VT, MVT::Other}); + SDValue RoundModeOp = DAG.getTargetConstant(RoundMode, DL, XLenVT); + SDValue Passthru; + SDValue Src; + SDValue Mask; + SDValue VL; + SDValue Policy; + if (IsMasked) { + Passthru = Op.getOperand(1); + Src = Op.getOperand(2); + Mask = Op.getOperand(3); + VL = Op.getOperand(4); + Policy = Op.getOperand(5); + } else { + Passthru = Op.getOperand(1); + Src = Op.getOperand(2); + VL = Op.getOperand(3); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); + Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + if (Passthru.isUndef()) + Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); + else + Policy = DAG.getTargetConstant(0, DL, XLenVT); + } + SmallVector Ops = {Passthru, Src, Mask, RoundModeOp, VL, Policy}; + return DAG.getNode(RISCVISD::FCVT_X_F_VL, DL, VTs, Ops); +} + struct VIDSequence { int64_t StepNumerator; unsigned StepDenominator; @@ -4887,6 +4921,14 @@ return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, Vec, VL); } + case Intrinsic::riscv_vfcvt_rdn_x_f_v: { + return lowerFCVTIntrinsics(Op, DAG, Subtarget, RISCVFPRndMode::RDN, + /*IsMasked*/ false); + } + case Intrinsic::riscv_vfcvt_rdn_x_f_v_mask: { + return lowerFCVTIntrinsics(Op, DAG, Subtarget, RISCVFPRndMode::RDN, + /*IsMasked*/ true); + } } return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); @@ -11044,6 +11086,7 @@ NODE_NAME_CASE(FMAXNUM_VL) NODE_NAME_CASE(MULHS_VL) NODE_NAME_CASE(MULHU_VL) + NODE_NAME_CASE(FCVT_X_F_VL) NODE_NAME_CASE(FP_TO_SINT_VL) NODE_NAME_CASE(FP_TO_UINT_VL) NODE_NAME_CASE(SINT_TO_FP_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInsertWriteFRM.cpp b/llvm/lib/Target/RISCV/RISCVInsertWriteFRM.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInsertWriteFRM.cpp @@ -0,0 +1,358 @@ +//===- RISCVInsertWriteFRM.cpp - Insert WriteFRM instructions +//---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function pass that insert ReadFRM/WriteFRM +// instructions for pseudos which need static rounding mode. +// +// The pass consists of 3 phases: +// +// Phase 1 & 2 calculates the reaching definition of FRM using an iterative +// worklist algorithm. +// Phase 3 insert WriteFRM instruction in each basic block if the incoming FRM +// is not compatible. +// +// TODO: Handle functions that have strictfp attribute. +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include +#include +using namespace llvm; + +#define DEBUG_TYPE "riscv-insert-writefrm" +#define RISCV_INSERT_WRITEFRM_NAME "RISCV Insert WriteFRM pass" +namespace { + +class FRMInfo { + // DYN represents the FRM state controlled by FENV. + // Values other than DYN represents a static rounding mode: RNE, RTZ, RDN, + // RUP, and RMM. + unsigned FRMImm; + + enum : uint8_t { + Uninitialized, + FRMIsImm, + Unknown, + } State = Uninitialized; + +public: + FRMInfo() : FRMImm(0) {} + + static FRMInfo getUnknown() { + FRMInfo Info; + Info.setUnknown(); + return Info; + } + + bool isValid() const { return State != Uninitialized; } + void setUnknown() { State = Unknown; } + bool isUnknown() const { return State == Unknown; } + + void setFRMImm(unsigned Imm) { + FRMImm = Imm; + State = FRMIsImm; + } + + void setClean() { + FRMImm = RISCVFPRndMode::DYN; + State = FRMIsImm; + } + + bool hasFRMImm() const { return State == FRMIsImm; } + + unsigned getFRMImm() const { + assert(hasFRMImm()); + return FRMImm; + } + + bool isClean() const { return hasFRMImm() && FRMImm == RISCVFPRndMode::DYN; } + + bool isDirty() const { return !isClean(); } + + bool operator==(const FRMInfo &Other) const { + // Uninitialized is only equal to another Uninitialized. + if (!isValid()) + return !Other.isValid(); + if (!Other.isValid()) + return !isValid(); + + // Unknown is only equal to another Unknown. + if (isUnknown()) + return Other.isUnknown(); + if (Other.isUnknown()) + return isUnknown(); + + return FRMImm == Other.FRMImm; + } + + // Calculate the FRMInfo visible to a block assuming this and Other are + // both predecessors. + FRMInfo intersect(const FRMInfo &Other) const { + // If the new value isn't valid, ignore it. + if (!Other.isValid()) + return *this; + + // If this value isn't valid, this must be the first predecessor, use it. + if (!isValid()) + return Other; + + // If either is unknown, the result is unknown. + if (isUnknown() || Other.isUnknown()) + return FRMInfo::getUnknown(); + + // If we have an exact, match return this. + if (*this == Other) + return *this; + + // Otherwise the result is unknown. + return FRMInfo::getUnknown(); + } + + // Calculate the FRMInfo visible at the end of the block assuming this + // is the predecessor value, and Other is change for this block. + FRMInfo merge(const FRMInfo &Other) const { + assert(isValid() && "Can only merge with a valid FRMInfo"); + + // Nothing changed from the predecessor, keep it. + if (!Other.isValid()) + return *this; + + // Otherwise just use whatever is in this block. + return Other; + } +}; + +struct BlockData { + // The FRMInfo that represents the net changes to the FRM register + // made by this block. Calculated in Phase 1. + FRMInfo Change; + + // The FRMInfo that represents the FRM state on exit from this + // block. Calculated in Phase 2. + FRMInfo Exit; + + // The FRMInfo that represents the FRM state from all predecessor + // blocks. Calculated in Phase 2, and used by Phase 3. + FRMInfo Pred; + + // Keeps track of whether the block is already in the queue. + bool InQueue = false; + + BlockData() {} +}; + +class RISCVInsertWriteFRM : public MachineFunctionPass { + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + + Register SavedFRMReg; + std::vector BlockInfo; + std::queue WorkList; + std::vector FrmPhiUser; + +public: + static char ID; + + RISCVInsertWriteFRM() : MachineFunctionPass(ID) { + initializeRISCVInsertWriteFRMPass(*PassRegistry::getPassRegistry()); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_INSERT_WRITEFRM_NAME; } + +private: + bool computeFRMChanges(const MachineBasicBlock &MBB); + void computeIncomingFRM(const MachineBasicBlock &MBB); + void emitWriteFRM(MachineBasicBlock &MBB); +}; + +Optional getFRMOpIdx(const MachineInstr &MI) { + uint64_t TSFlags = MI.getDesc().TSFlags; + if (!RISCVII::hasFPRndModeOp(TSFlags)) + return None; + + bool HasVL = RISCVII::hasVLOp(TSFlags); + bool HasSEW = RISCVII::hasSEWOp(TSFlags); + (void)HasVL; + (void)HasSEW; + assert(HasVL && HasSEW); + return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3; +} + +} // end anonymous namespace + +char RISCVInsertWriteFRM::ID = 0; + +INITIALIZE_PASS(RISCVInsertWriteFRM, DEBUG_TYPE, RISCV_INSERT_WRITEFRM_NAME, + false, false) + +bool RISCVInsertWriteFRM::computeFRMChanges(const MachineBasicBlock &MBB) { + bool NeedFRMChange = false; + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + + for (const MachineInstr &MI : MBB) { + if (auto Idx = getFRMOpIdx(MI)) { + const MachineOperand &FRMOp = MI.getOperand(Idx.getValue()); + + BBInfo.Change.setFRMImm(FRMOp.getImm()); + + if (BBInfo.Change.isDirty()) + NeedFRMChange = true; + } + if (MI.isCall() || MI.isInlineAsm()) + BBInfo.Change.setClean(); + } + // Initial exit state is the last change made in the block + BBInfo.Exit = BBInfo.Change; + return NeedFRMChange; +} + +void RISCVInsertWriteFRM::computeIncomingFRM(const MachineBasicBlock &MBB) { + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + + BBInfo.InQueue = false; + + FRMInfo InInfo; + if (MBB.pred_empty()) { + // FRM at function entry is controlled by FENV. + InInfo.setClean(); + } else { + for (MachineBasicBlock *P : MBB.predecessors()) + InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); + } + + // If we don't have any valid predecessor value, wait until we do. + if (!InInfo.isValid()) + return; + + BBInfo.Pred = InInfo; + + FRMInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change); + + // If the new exit value matches the old exit value, we don't need to revisit + // any blocks. + if (BBInfo.Exit == TmpStatus) + return; + + BBInfo.Exit = TmpStatus; + + // Add the successors to the work list so we can propagate the changed exit + // status. + for (MachineBasicBlock *S : MBB.successors()) + if (!BlockInfo[S->getNumber()].InQueue) + WorkList.push(S); +} + +void RISCVInsertWriteFRM::emitWriteFRM(MachineBasicBlock &MBB) { + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + FRMInfo CurFRMInfo = BBInfo.Pred; + + for (MachineInstr &MI : MBB) { + + if ((MI.readsRegister(RISCV::FRM) || MI.isCall() || MI.isInlineAsm()) && + CurFRMInfo.isDirty()) { + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRM)) + .addReg(SavedFRMReg); + CurFRMInfo.setClean(); + continue; + } + + if (auto Idx = getFRMOpIdx(MI)) { + MachineOperand &FRMOp = MI.getOperand(Idx.getValue()); + FRMInfo NewFRMInfo; + NewFRMInfo.setFRMImm(FRMOp.getImm()); + + if (NewFRMInfo == CurFRMInfo) + continue; + + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRMImm)) + .addImm(NewFRMInfo.getFRMImm()); + + FRMOp.setImm(RISCVFPRndMode::DYN); + MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, + /*isImp*/ true)); + + CurFRMInfo = NewFRMInfo; + continue; + } + } + + // Restore FRM before leaving the function + if (MBB.succ_empty() && CurFRMInfo.isDirty()) { + MachineInstr &MI = *MBB.getFirstTerminator(); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteFRM)) + .addReg(SavedFRMReg); + } + return; +} + +bool RISCVInsertWriteFRM::runOnMachineFunction(MachineFunction &MF) { + // Skip if the vector extension is not enabled. + const RISCVSubtarget &ST = MF.getSubtarget(); + if (!ST.hasVInstructions()) + return false; + + TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + + assert(BlockInfo.empty() && "Expect empty block infos"); + BlockInfo.resize(MF.getNumBlockIDs()); + + bool NeedFRMChange = false; + // Phase 1 - Collect FRM changes + for (const MachineBasicBlock &MBB : MF) + NeedFRMChange |= computeFRMChanges(MBB); + + // FIXME: Currently we only handle non-strictfp function. + // For strictfp function we will restore FENV FRM in a different way. + if (NeedFRMChange) + assert(!MF.getFunction().hasFnAttribute(Attribute::StrictFP) && + "RISCVInsertWriteFRM does not support strict FP model yet"); + + // We can skip this if no instruction needs static round mode. + if (NeedFRMChange) { + // Phase 2 - Gathering FRM info from predecessors + for (MachineBasicBlock &MBB : MF) { + WorkList.push(&MBB); + BlockInfo[MBB.getNumber()].InQueue = true; + } + while (!WorkList.empty()) { + const MachineBasicBlock &MBB = *WorkList.front(); + WorkList.pop(); + computeIncomingFRM(MBB); + } + + // For non-strictfp function, we assume the FENV FRM does not change after + // entry. + MachineBasicBlock &EntryMBB = MF.front(); + SavedFRMReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(EntryMBB, EntryMBB.getFirstNonPHI(), DebugLoc(), + TII->get(RISCV::ReadFRM), SavedFRMReg); + + // Phase 3 - Insert WriteFRM needed in each basic block + for (MachineBasicBlock &MBB : MF) + emitWriteFRM(MBB); + } + + BlockInfo.clear(); + + return NeedFRMChange; +} + +/// Returns an instance of the Insert WriteFRM pass. +FunctionPass *llvm::createRISCVInsertWriteFRMPass() { + return new RISCVInsertWriteFRM(); +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -133,6 +133,16 @@ def riscv_sint_to_fp_vl : SDNode<"RISCVISD::SINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>; +// Input: (passthru, vector, mask, roundmode, vl, policy) +def SDT_RISCVFP2IOp_RM_VL : SDTypeProfile<1, 6, [ + SDTCisInt<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisFP<2>, SDTCisSameNumEltsAs<0, 2>, + SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>, + SDTCisVT<4, XLenVT>, SDTCisVT<5, XLenVT>, SDTCisVT<6,XLenVT> +]>; + +def riscv_fcvt_x_f_vl : SDNode<"RISCVISD::FCVT_X_F_VL", SDT_RISCVFP2IOp_RM_VL>; + def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL", SDTypeProfile<1, 5, [SDTCVecEltisVT<0, i1>, SDTCisVec<1>, @@ -619,6 +629,35 @@ } } +multiclass VPatConvertFP2ISDNode_RM_V_VL { + foreach fvti = AllFloatVectors in { + defvar ivti = GetIntVTypeInfo.Vti; + def : Pat<(ivti.Vector (vop (ivti.Vector undef), + (fvti.Vector fvti.RegClass:$rs1), + (fvti.Mask true_mask), + (XLenVT timm:$rm), + VLOpFrag, + TAIL_AGNOSTIC)), + (!cast(instruction_name#"_"#ivti.LMul.MX) + fvti.RegClass:$rs1, + (XLenVT timm:$rm), + GPR:$vl, ivti.Log2SEW)>; + def : Pat<(ivti.Vector (vop (ivti.Vector ivti.RegClass:$merge), + (fvti.Vector fvti.RegClass:$rs1), + (ivti.Mask V0), + (XLenVT timm:$rm), + VLOpFrag, + (XLenVT timm:$policy))), + (!cast(instruction_name#"_"#ivti.LMul.MX # "_MASK") + ivti.RegClass:$merge, + fvti.RegClass:$rs1, + (ivti.Mask V0), + (XLenVT timm:$rm), + GPR:$vl, ivti.Log2SEW, + (XLenVT timm:$policy))>; + } +} + multiclass VPatConvertI2FPSDNode_V_VL { foreach fvti = AllFloatVectors in { defvar ivti = GetIntVTypeInfo.Vti; @@ -1629,6 +1668,8 @@ defm : VPatConvertI2FPSDNode_V_VL; defm : VPatConvertI2FPSDNode_V_VL; + defm : VPatConvertFP2ISDNode_RM_V_VL; + // 14.18. Widening Floating-Point/Integer Type-Convert Instructions defm : VPatWConvertFP2ISDNode_V_VL; defm : VPatWConvertFP2ISDNode_V_VL; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -47,6 +47,7 @@ initializeRISCVSExtWRemovalPass(*PR); initializeRISCVExpandPseudoPass(*PR); initializeRISCVInsertVSETVLIPass(*PR); + initializeRISCVInsertWriteFRMPass(*PR); } static StringRef computeDataLayout(const Triple &TT) { @@ -213,6 +214,7 @@ if (TM->getOptLevel() != CodeGenOpt::None) addPass(createRISCVMergeBaseOffsetOptPass()); addPass(createRISCVInsertVSETVLIPass()); + addPass(createRISCVInsertWriteFRMPass()); } void RISCVPassConfig::addPostRegAlloc() { diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -46,6 +46,7 @@ ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation ; CHECK-NEXT: RISCV Insert VSETVLI pass +; CHECK-NEXT: RISCV Insert WriteFRM pass ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Fast Register Allocator diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -93,6 +93,7 @@ ; RV64-NEXT: RISCV sext.w Removal ; CHECK-NEXT: RISCV Merge Base Offset ; CHECK-NEXT: RISCV Insert VSETVLI pass +; CHECK-NEXT: RISCV Insert WriteFRM pass ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Process Implicit Definitions ; CHECK-NEXT: Remove unreachable machine basic blocks diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rdn-x-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rdn-x-f.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rdn-x-f.ll @@ -0,0 +1,739 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv1i16.nxv1f16( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv1i16_nxv1f16( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv1i16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv1i16.nxv1f16( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv1i16.nxv1f16( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv1i16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv1i16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv1i16.nxv1f16( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv2i16.nxv2f16( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv2i16_nxv2f16( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv2i16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv2i16.nxv2f16( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv2i16.nxv2f16( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv2i16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv2i16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv2i16.nxv2f16( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv4i16.nxv4f16( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv4i16_nxv4f16( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv4i16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv4i16.nxv4f16( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv4i16.nxv4f16( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv4i16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv4i16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv4i16.nxv4f16( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv8i16.nxv8f16( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv8i16_nxv8f16( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv8i16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv8i16.nxv8f16( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv8i16.nxv8f16( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv8i16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv8i16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv8i16.nxv8f16( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv16i16.nxv16f16( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv16i16_nxv16f16( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv16i16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv16i16.nxv16f16( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv16i16.nxv16f16( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv16i16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv16i16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv16i16.nxv16f16( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv32i16.nxv32f16( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv32i16_nxv32f16( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv32i16_nxv32f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv32i16.nxv32f16( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv32i16.nxv32f16( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv32i16_nxv32f16( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv32i16_nxv32f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv32i16.nxv32f16( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv1i32.nxv1f32( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv1i32_nxv1f32( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv1i32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv1i32.nxv1f32( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv1i32.nxv1f32( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv1i32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv1i32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv1i32.nxv1f32( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv2i32.nxv2f32( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv2i32_nxv2f32( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv2i32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv2i32.nxv2f32( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv2i32.nxv2f32( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv2i32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv2i32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv2i32.nxv2f32( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv4i32.nxv4f32( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv4i32_nxv4f32( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv4i32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv4i32.nxv4f32( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv4i32.nxv4f32( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv4i32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv4i32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv4i32.nxv4f32( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv8i32.nxv8f32( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv8i32_nxv8f32( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv8i32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv8i32.nxv8f32( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv8i32.nxv8f32( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv8i32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv8i32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv8i32.nxv8f32( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv16i32.nxv16f32( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv16i32_nxv16f32( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv16i32_nxv16f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv16i32.nxv16f32( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv16i32.nxv16f32( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv16i32_nxv16f32( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv16i32_nxv16f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv16i32.nxv16f32( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv1i64.nxv1f64( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv1i64_nxv1f64( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv1i64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv1i64.nxv1f64( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv1i64.nxv1f64( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv1i64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv1i64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv1i64.nxv1f64( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv2i64.nxv2f64( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv2i64_nxv2f64( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv2i64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv2i64.nxv2f64( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv2i64.nxv2f64( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv2i64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv2i64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv2i64.nxv2f64( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv4i64.nxv4f64( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv4i64_nxv4f64( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv4i64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv4i64.nxv4f64( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv4i64.nxv4f64( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv4i64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv4i64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv4i64.nxv4f64( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv8i64.nxv8f64( + , + , + iXLen); + +define @intrinsic_vfcvt_rdn.x.f.v_nxv8i64_nxv8f64( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_rdn.x.f.v_nxv8i64_nxv8f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv8i64.nxv8f64( + undef, + %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv8i64.nxv8f64( + , + , + , + iXLen, + iXLen); + +define @intrinsic_vfcvt_mask_rdn.x.f.v_nxv8i64_nxv8f64( %0, %1, %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfcvt_mask_rdn.x.f.v_nxv8i64_nxv8f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.mask.nxv8i64.nxv8f64( + %0, + %1, + %2, + iXLen %3, iXLen 1) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/writefrm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/writefrm-insert.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/writefrm-insert.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs -O2 < %s | FileCheck %s + +declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64) + +declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i64.nxv1f64( + , + , + i64); + +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv1i64.nxv1f64( + , + , + i64); + +; Check FRM is restored before leaving the block +define @test1(i64 %avl, i8 zeroext %cond, %a) nounwind { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: frrm a2 +; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu +; CHECK-NEXT: beqz a1, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %if.else +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 3, i64 0) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfcvt.rdn.x.f.v.nxv1i64.nxv1f64( undef, %a, i64 %0) + br label %if.end + +if.else: ; preds = %entry + %2 = tail call @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i64.nxv1f64( undef, %a, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + ret %c.0 +} + +declare @llvm.riscv.vfadd.nxv16f32.nxv16f32(, , , i64) +declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv16i32.nxv16f32( + , + , + i64); + +declare @llvm.riscv.vle.nxv16f32.i64(, * nocapture, i64) +declare void @llvm.riscv.vse.nxv16i32.i64(, * nocapture, i64) + +define void @test2(i64 %n, i32 * nocapture %dst, float* nocapture readonly %x, float* nocapture %y) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a5, a0, e32, m8, ta, mu +; CHECK-NEXT: frrm a6 +; CHECK-NEXT: beqz a5, .LBB1_2 +; CHECK-NEXT: .LBB1_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: slli a4, a5, 2 +; CHECK-NEXT: vle32.v v16, (a3) +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a3, a3, a4 +; CHECK-NEXT: fsrm a6 +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: fsrmi 2 +; CHECK-NEXT: vfcvt.x.f.v v8, v8 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: sub a0, a0, a5 +; CHECK-NEXT: vsetvli a5, a0, e32, m8, ta, mu +; CHECK-NEXT: add a1, a1, a4 +; CHECK-NEXT: bnez a5, .LBB1_1 +; CHECK-NEXT: .LBB1_2: # %for.end +; CHECK-NEXT: fsrm a6 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3) + %cmp.not13 = icmp eq i64 %0, 0 + br i1 %cmp.not13, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %1 = phi i64 [ %12, %for.body ], [ %0, %entry ] + %n.addr.016 = phi i64 [ %sub, %for.body ], [ %n, %entry ] + + %x.addr.015 = phi float* [ %4, %for.body ], [ %x, %entry ] + %y.addr.014 = phi float* [ %7, %for.body ], [ %y, %entry ] + %dst.addr = phi i32* [ %11, %for.body ], [ %dst, %entry ] + + %2 = bitcast float* %x.addr.015 to * + %3 = tail call @llvm.riscv.vle.nxv16f32.i64( undef, * %2, i64 %1) + %4 = getelementptr inbounds float, float* %x.addr.015, i64 %1 + + %5 = bitcast float* %y.addr.014 to * + %6 = tail call @llvm.riscv.vle.nxv16f32.i64( undef, * %5, i64 %1) + %7 = getelementptr inbounds float, float* %y.addr.014, i64 %1 + + %8 = tail call @llvm.riscv.vfadd.nxv16f32.nxv16f32( undef, %3, %6, i64 %1) + %9 = tail call @llvm.riscv.vfcvt.rdn.x.f.v.nxv16i32.nxv16f32( undef, %8, i64 %1) + + %10 = bitcast i32* %dst.addr to * + tail call void @llvm.riscv.vse.nxv16i32.i64( %9, * %10, i64 %1) + %11 = getelementptr inbounds i32, i32* %dst.addr, i64 %1 + + %sub = sub i64 %n.addr.016, %1 + %12 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub, i64 2, i64 3) + %cmp.not = icmp eq i64 %12, 0 + br i1 %cmp.not, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/writefrm-insert.mir b/llvm/test/CodeGen/RISCV/rvv/writefrm-insert.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/writefrm-insert.mir @@ -0,0 +1,173 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-insert-writefrm | FileCheck %s +--- | + ; ModuleID = '' + source_filename = "" + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" + target triple = "riscv64" + + define @fcvt_rdn( %0, i64 %1) #0 { + entry: + %a = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv1i64.nxv1f64.i64( undef, %0, i64 %1) + ret %a + } + + define @fadd_fcvt_rdn( %0, %1, i64 %2) #0 { + entry: + %a = call @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64( undef, %0, %1, i64 %2) + %b = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv1i64.nxv1f64.i64( undef, %a, i64 %2) + ret %b + } + + define @fcvt_rdn_fadd( %0, %1, i64 %2) #0 { + entry: + %b = call @llvm.riscv.vfcvt.rdn.x.f.v.nxv1i64.nxv1f64.i64( undef, %0, i64 %2) + %c = call @llvm.riscv.vfcvt.f.x.v.nxv1f64.nxv1i64.i64( undef, %b, i64 %2) + %a = call @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64( undef, %c, %1, i64 %2) + ret %a + } + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(, , , i64) #1 + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vfcvt.rdn.x.f.v.nxv1i64.nxv1f64.i64(, , i64) #1 + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vfcvt.f.x.v.nxv1f64.nxv1i64.i64(, , i64) #1 + + attributes #0 = { "target-features"="+m,+d,+v" } + attributes #1 = { nounwind readnone "target-features"="+m,+d,+v" } + +... +--- +name: fcvt_rdn +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: gprnox0 } + - { id: 2, class: vr } +liveins: + - { reg: '$v8', virtual-reg: '%0' } + - { reg: '$x10', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8, $x10 + + ; CHECK-LABEL: name: fcvt_rdn + ; CHECK: liveins: $v8, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ReadFRM:%[0-9]+]]:gpr = ReadFRM implicit $frm + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: WriteFRMImm 2, implicit-def $frm + ; CHECK-NEXT: %2:vr = nofpexcept PseudoVFCVT_X_F_V_M1 [[COPY1]], 7, $noreg, 6, implicit $vl, implicit $vtype, implicit $frm + ; CHECK-NEXT: $v8 = COPY %2 + ; CHECK-NEXT: WriteFRM [[ReadFRM]], implicit-def $frm + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:gprnox0 = COPY $x10 + %0:vr = COPY $v8 + dead $x0 = PseudoVSETVLI %1, 88, implicit-def $vl, implicit-def $vtype + %2:vr = nofpexcept PseudoVFCVT_X_F_V_M1 %0, 2, $noreg, 6, implicit $vl, implicit $vtype + $v8 = COPY %2 + PseudoRET implicit $v8 + +... +--- +name: fadd_fcvt_rdn +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: vr } + - { id: 2, class: gprnox0 } + - { id: 3, class: vr } + - { id: 4, class: vr } +liveins: + - { reg: '$v8', virtual-reg: '%0' } + - { reg: '$v9', virtual-reg: '%1' } + - { reg: '$x10', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8, $v9, $x10 + + ; CHECK-LABEL: name: fadd_fcvt_rdn + ; CHECK: liveins: $v8, $v9, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ReadFRM:%[0-9]+]]:gpr = ReadFRM implicit $frm + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: %3:vr = nofpexcept PseudoVFADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: WriteFRMImm 2, implicit-def $frm + ; CHECK-NEXT: %4:vr = nofpexcept PseudoVFCVT_X_F_V_M1 killed %3, 7, $noreg, 6, implicit $vl, implicit $vtype, implicit $frm + ; CHECK-NEXT: $v8 = COPY %4 + ; CHECK-NEXT: WriteFRM [[ReadFRM]], implicit-def $frm + ; CHECK-NEXT: PseudoRET implicit $v8 + %2:gprnox0 = COPY $x10 + %1:vr = COPY $v9 + %0:vr = COPY $v8 + dead $x0 = PseudoVSETVLI %2, 88, implicit-def $vl, implicit-def $vtype + %3:vr = nofpexcept PseudoVFADD_VV_M1 %0, %1, $noreg, 6, implicit $frm, implicit $vl, implicit $vtype + %4:vr = nofpexcept PseudoVFCVT_X_F_V_M1 killed %3, 2, $noreg, 6, implicit $vl, implicit $vtype + $v8 = COPY %4 + PseudoRET implicit $v8 + +... +--- +name: fcvt_rdn_fadd +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: vr } + - { id: 2, class: gprnox0 } + - { id: 3, class: vr } + - { id: 4, class: vr } + - { id: 5, class: vr } +liveins: + - { reg: '$v8', virtual-reg: '%0' } + - { reg: '$v9', virtual-reg: '%1' } + - { reg: '$x10', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8, $v9, $x10 + + ; CHECK-LABEL: name: fcvt_rdn_fadd + ; CHECK: liveins: $v8, $v9, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ReadFRM:%[0-9]+]]:gpr = ReadFRM implicit $frm + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: WriteFRMImm 2, implicit-def $frm + ; CHECK-NEXT: %3:vr = nofpexcept PseudoVFCVT_X_F_V_M1 [[COPY2]], 7, $noreg, 6, implicit $vl, implicit $vtype, implicit $frm + ; CHECK-NEXT: WriteFRM [[ReadFRM]], implicit-def $frm + ; CHECK-NEXT: %4:vr = nofpexcept PseudoVFCVT_F_X_V_M1 killed %3, 7, $noreg, 6, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: %5:vr = nofpexcept PseudoVFADD_VV_M1 killed %4, [[COPY1]], $noreg, 6, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v8 = COPY %5 + ; CHECK-NEXT: PseudoRET implicit $v8 + %2:gprnox0 = COPY $x10 + %1:vr = COPY $v9 + %0:vr = COPY $v8 + dead $x0 = PseudoVSETVLI %2, 88, implicit-def $vl, implicit-def $vtype + %3:vr = nofpexcept PseudoVFCVT_X_F_V_M1 %0, 2, $noreg, 6, implicit $vl, implicit $vtype + %4:vr = nofpexcept PseudoVFCVT_F_X_V_M1 killed %3, 7, $noreg, 6, implicit $frm, implicit $vl, implicit $vtype + %5:vr = nofpexcept PseudoVFADD_VV_M1 killed %4, %1, $noreg, 6, implicit $frm, implicit $vl, implicit $vtype + $v8 = COPY %5 + PseudoRET implicit $v8 + +...