Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -23839,3 +23839,45 @@ The '``llvm.preserve.struct.access.index``' intrinsic produces the same result as a getelementptr with base ``base`` and access operands ``{0, gep_index}``. + +'``llvm.fptrunc.round``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.fptrunc.round( , metadata ) + +Overview: +""""""""" + +The '``llvm.fptrunc.round``' intrinsic truncates +:ref:`floating-point ` ``value`` to type ``ty2`` +with a specified rounding mode. + +Arguments: +"""""""""" + +The first argument to the '``llvm.fptrunc.round``' intrinsic must be +:ref:`floating point ` or :ref:`vector ` of floating +point values. This argument must be larger in size than the result. + +The '``llvm.fptrunc.round``' intrinsic takes a :ref:`floating-point +` value to cast and a :ref:`floating-point ` type +to cast it to. The size of ``value`` must be larger than the size of ``ty2``. + +The second argument specifies the rounding mode as described in the constrained +intrinsics section. +For this intrinsic, the "round.dynamic" mode is not supported. + +Semantics: +"""""""""" + +The '``llvm.fptrunc.round``' intrinsic casts a ``value`` from a larger +:ref:`floating-point ` type to a smaller :ref:`floating-point +` type. +This instruction is assumed to execute in the default :ref:`floating-point +environment ` *except* for the rounding mode. Index: llvm/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -462,6 +462,9 @@ STRICT_FSETCC, STRICT_FSETCCS, + // FPTRUNC_ROUND - This corresponds to the fptrunc_round intrinsic. + FPTRUNC_ROUND, + /// FMA - Perform a * b + c with no intermediate rounding step. FMA, Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -901,6 +901,12 @@ } // FIXME: Consider maybe adding intrinsics for sitofp, uitofp. + +// Truncate a floating point number with a specific rounding mode +def int_fptrunc_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ llvm_anyfloat_ty, llvm_metadata_ty ], + [ IntrNoMem, IntrWillReturn ]>; + //===------------------------- Expect Intrinsics --------------------------===// // def int_expect : DefaultAttrsIntrinsic<[llvm_anyint_ty], Index: llvm/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/include/llvm/Support/TargetOpcodes.def +++ llvm/include/llvm/Support/TargetOpcodes.def @@ -321,6 +321,9 @@ /// Generic freeze. HANDLE_TARGET_OPCODE(G_FREEZE) +// INTRINSIC fptrunc_round intrinsic. +HANDLE_TARGET_OPCODE(G_INTRINSIC_FPTRUNC_ROUND) + /// INTRINSIC trunc intrinsic. HANDLE_TARGET_OPCODE(G_INTRINSIC_TRUNC) Index: llvm/include/llvm/Target/GenericOpcodes.td =================================================================== --- llvm/include/llvm/Target/GenericOpcodes.td +++ llvm/include/llvm/Target/GenericOpcodes.td @@ -965,6 +965,12 @@ //------------------------------------------------------------------------------ // Opcodes for LLVM Intrinsics //------------------------------------------------------------------------------ +def G_INTRINSIC_FPTRUNC_ROUND : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src1, i32imm:$round_mode); + let hasSideEffects = false; +} + def G_INTRINSIC_TRUNC : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2251,6 +2251,24 @@ Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0}; return CLI->lowerCall(MIRBuilder, Info); } + case Intrinsic::fptrunc_round: { + unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI); + + // Convert the metadata argument to a constant integer + Metadata *MD = cast(CI.getOperand(1))->getMetadata(); + Optional RoundMode = + convertStrToRoundingMode(cast(MD)->getString()); + + MachineInstrBuilder MIB = MIRBuilder.buildInstr( + TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND, {getOrCreateVReg(CI)}, + {getOrCreateVReg(*CI.getArgOperand(0))}); + + // Add the Rounding mode as an integer + MIB.addImm((int)RoundMode.getValue()); + MIB->setFlags(Flags); + + return MIB; + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6332,6 +6332,40 @@ #include "llvm/IR/VPIntrinsics.def" visitVectorPredicationIntrinsic(cast(I)); return; + case Intrinsic::fptrunc_round: { + // Build the operand list. + SmallVector Ops; + + // Get the last argument, the metadata and convert it to an integer in the + // call + Metadata *MD = cast(I.getOperand(1))->getMetadata(); + Optional RoundMode = + convertStrToRoundingMode(cast(MD)->getString()); + + TargetLowering::IntrinsicInfo Info; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Add the first operand. + Ops.push_back(getValue(I.getArgOperand(0))); + + // Convert the last operand to an integer and add it to the list + Ops.push_back(DAG.getTargetConstant((int)RoundMode.getValue(), sdl, + TLI.getPointerTy(DAG.getDataLayout()))); + + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + // Propagate fast-math-flags from IR to node(s). + SDNodeFlags Flags; + if (auto *FPMO = dyn_cast(&I)) + Flags.copyFMF(*FPMO); + SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); + + SDValue Result; + Result = DAG.getNode(ISD::FPTRUNC_ROUND, getCurSDLoc(), VT, Ops); + setValue(&I, Result); + + return; + } case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -4738,6 +4738,21 @@ "an array"); break; } + case Intrinsic::fptrunc_round: { + // Check the rounding mode + Metadata *MD = nullptr; + auto *MAV = dyn_cast(Call.getOperand(1)); + if (MAV) + MD = MAV->getMetadata(); + + Assert(MD != nullptr, "missing rounding mode argument", Call); + + Optional RoundMode = + convertStrToRoundingMode(cast(MD)->getString()); + Assert(RoundMode.hasValue() && RoundMode != RoundingMode::Dynamic, + "unsupported rounding mode argument", Call); + break; + } #define INSTRUCTION(NAME, NARGS, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" Index: llvm/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -220,6 +220,9 @@ def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; + class GISelSop2Pat < SDPatternOperator node, Instruction inst, Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -483,6 +483,9 @@ CONST_DATA_PTR, PC_ADD_REL_OFFSET, LDS, + FPTRUNC_ROUND_UPWARD, + FPTRUNC_ROUND_DOWNWARD, + DUMMY_CHAIN, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, LOAD_D16_HI, Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4445,6 +4445,8 @@ NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) NODE_NAME_CASE(LDS) + NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD) + NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD) NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(LOAD_D16_HI) Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -170,6 +170,8 @@ bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeImageIntrinsic( MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer, Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -839,6 +839,11 @@ .scalarize(0) .lower(); + getActionDefinitionsBuilder(G_INTRINSIC_FPTRUNC_ROUND) + .customFor({S16, S32}) + .scalarize(0) + .lower(); + // Lower roundeven into G_FRINT getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN}) .scalarize(0) @@ -1771,6 +1776,8 @@ case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTTZ: return legalizeCTLZ_CTTZ(MI, MRI, B); + case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND: + return legalizeFPTruncRound(MI, B); default: return false; } @@ -4918,6 +4925,27 @@ return true; } +bool AMDGPULegalizerInfo::legalizeFPTruncRound(MachineInstr &MI, + MachineIRBuilder &B) const { + unsigned Opc; + int RoundMode = MI.getOperand(2).getImm(); + + if (RoundMode == (int)RoundingMode::TowardPositive) + Opc = AMDGPU::G_FPTRUNC_ROUND_UPWARD; + else if (RoundMode == (int)RoundingMode::TowardNegative) + Opc = AMDGPU::G_FPTRUNC_ROUND_DOWNWARD; + else + return false; + + B.buildInstr(Opc) + .addDef(MI.getOperand(0).getReg()) + .addUse(MI.getOperand(1).getReg()); + + MI.eraseFromParent(); + + return true; +} + bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { MachineIRBuilder &B = Helper.MIRBuilder; Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4594,6 +4594,9 @@ OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1); break; } + case AMDGPU::G_FPTRUNC_ROUND_UPWARD: + case AMDGPU::G_FPTRUNC_ROUND_DOWNWARD: + return getDefaultMappingVOP(MI); } return getInstructionMapping(/*ID*/1, /*Cost*/1, Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -17,6 +17,7 @@ #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -602,6 +603,7 @@ setOperationAction(ISD::SINT_TO_FP, MVT::f16, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::f16, Promote); setOperationAction(ISD::FROUND, MVT::f16, Custom); + setOperationAction(ISD::FPTRUNC_ROUND, MVT::f16, Custom); // F16 - VOP2 Actions. setOperationAction(ISD::BR_CC, MVT::f16, Expand); @@ -4653,6 +4655,21 @@ return lowerBUILD_VECTOR(Op, DAG); case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); + case ISD::FPTRUNC_ROUND: { + unsigned Opc; + SDLoc DL(Op); + + // Get the rounding mode from the last operand + int RoundMode = cast(Op.getOperand(1))->getZExtValue(); + if (RoundMode == (int)RoundingMode::TowardPositive) + Opc = AMDGPUISD::FPTRUNC_ROUND_UPWARD; + else if (RoundMode == (int)RoundingMode::TowardNegative) + Opc = AMDGPUISD::FPTRUNC_ROUND_DOWNWARD; + else + return SDValue(); + + return DAG.getNode(Opc, DL, Op.getNode()->getVTList(), Op->getOperand(0)); + } case ISD::TRAP: return lowerTRAP(Op, DAG); case ISD::DEBUGTRAP: Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -255,6 +255,14 @@ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] >; +def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD", + SDTFPRoundOp +>; + +def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD", + SDTFPRoundOp +>; + //===----------------------------------------------------------------------===// // ValueType helpers //===----------------------------------------------------------------------===// Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -176,6 +176,22 @@ let mayStore = 0; } +// Pseudo instructions used for @llvm.fptrunc.round upward +// and @llvm.fptrunc.round downward. +// These intrinsics will be legalized to G_FPTRUNC_ROUND_UPWARD +// and G_FPTRUNC_ROUND_DOWNWARD before being lowered to +// FPTRUNC_UPWARD_PSEUDO and FPTRUNC_DOWNWARD_PSEUDO. +// The final codegen is done in the ModeRegister pass. +let Uses = [MODE, EXEC] in { +def FPTRUNC_UPWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst), + (ins VGPR_32:$src0), + [(set f16:$vdst, (SIfptrunc_round_upward f32:$src0))]>; + +def FPTRUNC_DOWNWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst), + (ins VGPR_32:$src0), + [(set f16:$vdst, (SIfptrunc_round_downward f32:$src0))]>; +} // End Uses = [MODE, EXEC] + // Invert the exec mask and overwrite the inactive lanes of dst with inactive, // restoring it after we're done. let Defs = [SCC] in { @@ -3076,3 +3092,15 @@ // TODO: Should really base this on the call target let isConvergent = 1; } + +def G_FPTRUNC_ROUND_UPWARD : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$vdst); + let InOperandList = (ins type1:$src0); + let hasSideEffects = 0; +} + +def G_FPTRUNC_ROUND_DOWNWARD : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$vdst); + let InOperandList = (ins type1:$src0); + let hasSideEffects = 0; +} Index: llvm/lib/Target/AMDGPU/SIModeRegister.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIModeRegister.cpp +++ llvm/lib/Target/AMDGPU/SIModeRegister.cpp @@ -162,7 +162,9 @@ // double precision setting. Status SIModeRegister::getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII) { - if (TII->usesFPDPRounding(MI)) { + if (TII->usesFPDPRounding(MI) || + MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO || + MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) { switch (MI.getOpcode()) { case AMDGPU::V_INTERP_P1LL_F16: case AMDGPU::V_INTERP_P1LV_F16: @@ -170,6 +172,18 @@ // f16 interpolation instructions need double precision round to zero return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO)); + case AMDGPU::FPTRUNC_UPWARD_PSEUDO: { + // Replacing the pseudo by a real instruction + MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); + return Status(FP_ROUND_MODE_DP(3), + FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_INF)); + } + case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: { + // Replacing the pseudo by a real instruction + MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); + return Status(FP_ROUND_MODE_DP(3), + FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEGINF)); + } default: return DefaultStatus; } Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -131,6 +131,10 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # +# DEBUG-NEXT: G_INTRINSIC_FPTRUNC_ROUND (opcode 71): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# # DEBUG-NEXT: G_INTRINSIC_TRUNC (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected Index: llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s + +define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) { +; CHECK-LABEL: test_fptrunc_round_upward: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CHECK-NEXT: global_store_short v[6:7], v0, off +; CHECK-NEXT: s_endpgm + %res = call half @llvm.fptrunc.round(float %a, metadata !"round.upward") + store half %res, half addrspace(1)* %out, align 4 + ret void +} + +define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) { +; CHECK-LABEL: test_fptrunc_round_downward: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CHECK-NEXT: global_store_short v[6:7], v0, off +; CHECK-NEXT: s_endpgm + %res = call half @llvm.fptrunc.round(float %a, metadata !"round.downward") + store half %res, half addrspace(1)* %out, align 4 + ret void +} + +define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float %b, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) { +; CHECK-LABEL: test_fptrunc_round_upward_multiple_calls: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v1 +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 +; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 +; CHECK-NEXT: v_add_f16_e32 v0, v0, v2 +; CHECK-NEXT: v_add_f16_e32 v0, v1, v0 +; CHECK-NEXT: global_store_short v[7:8], v0, off +; CHECK-NEXT: s_endpgm + %res1 = call half @llvm.fptrunc.round(float %a, metadata !"round.upward") + %res2 = call half @llvm.fptrunc.round(float %b, metadata !"round.upward") + %res3 = call half @llvm.fptrunc.round(float %b, metadata !"round.downward") + %res4 = fadd half %res1, %res2 + %res5 = fadd half %res3, %res4 + store half %res5, half addrspace(1)* %out, align 4 + ret void +} + +declare half @llvm.fptrunc.round(float, metadata)