diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -24049,3 +24049,43 @@ The '``llvm.preserve.struct.access.index``' intrinsic produces the same result as a getelementptr with base ``base`` and access operands ``{0, gep_index}``. + +'``llvm.fptrunc.round``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.fptrunc.round( , metadata ) + +Overview: +""""""""" + +The '``llvm.fptrunc.round``' intrinsic truncates +:ref:`floating-point ` ``value`` to type ``ty2`` +with a specified rounding mode. + +Arguments: +"""""""""" + +The '``llvm.fptrunc.round``' intrinsic takes a :ref:`floating-point +` value to cast and a :ref:`floating-point ` type +to cast it to. This argument must be larger in size than the result. + +The second argument specifies the rounding mode as described in the constrained +intrinsics section. +For this intrinsic, the "round.dynamic" mode is not supported. + +Semantics: +"""""""""" + +The '``llvm.fptrunc.round``' intrinsic casts a ``value`` from a larger +:ref:`floating-point ` type to a smaller :ref:`floating-point +` type. +This intrinsic is assumed to execute in the default :ref:`floating-point +environment ` *except* for the rounding mode. +This intrinsic is not supported on all targets. Some targets may not support +all rounding modes. diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -462,6 +462,9 @@ STRICT_FSETCC, STRICT_FSETCCS, + // FPTRUNC_ROUND - This corresponds to the fptrunc_round intrinsic. + FPTRUNC_ROUND, + /// FMA - Perform a * b + c with no intermediate rounding step. FMA, diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -909,6 +909,12 @@ } // FIXME: Consider maybe adding intrinsics for sitofp, uitofp. + +// Truncate a floating point number with a specific rounding mode +def int_fptrunc_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ llvm_anyfloat_ty, llvm_metadata_ty ], + [ IntrNoMem, IntrWillReturn ]>; + //===------------------------- Expect Intrinsics --------------------------===// // def int_expect : DefaultAttrsIntrinsic<[llvm_anyint_ty], diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -322,6 +322,9 @@ /// Generic freeze. HANDLE_TARGET_OPCODE(G_FREEZE) +// INTRINSIC fptrunc_round intrinsic. +HANDLE_TARGET_OPCODE(G_INTRINSIC_FPTRUNC_ROUND) + /// INTRINSIC trunc intrinsic. HANDLE_TARGET_OPCODE(G_INTRINSIC_TRUNC) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -965,6 +965,12 @@ //------------------------------------------------------------------------------ // Opcodes for LLVM Intrinsics //------------------------------------------------------------------------------ +def G_INTRINSIC_FPTRUNC_ROUND : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src1, i32imm:$round_mode); + let hasSideEffects = false; +} + def G_INTRINSIC_TRUNC : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2252,6 +2252,23 @@ Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0}; return CLI->lowerCall(MIRBuilder, Info); } + case Intrinsic::fptrunc_round: { + unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI); + + // Convert the metadata argument to a constant integer + Metadata *MD = cast(CI.getArgOperand(1))->getMetadata(); + Optional RoundMode = + convertStrToRoundingMode(cast(MD)->getString()); + + // Add the Rounding mode as an integer + MIRBuilder + .buildInstr(TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND, + {getOrCreateVReg(CI)}, + {getOrCreateVReg(*CI.getArgOperand(0))}, Flags) + .addImm((int)RoundMode.getValue()); + + return true; + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6343,6 +6343,29 @@ #include "llvm/IR/VPIntrinsics.def" visitVectorPredicationIntrinsic(cast(I)); return; + case Intrinsic::fptrunc_round: { + // Get the last argument, the metadata and convert it to an integer in the + // call + Metadata *MD = cast(I.getArgOperand(1))->getMetadata(); + Optional RoundMode = + convertStrToRoundingMode(cast(MD)->getString()); + + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + // Propagate fast-math-flags from IR to node(s). + SDNodeFlags Flags; + Flags.copyFMF(*cast(&I)); + SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); + + SDValue Result; + Result = DAG.getNode( + ISD::FPTRUNC_ROUND, sdl, VT, getValue(I.getArgOperand(0)), + DAG.getTargetConstant((int)RoundMode.getValue(), sdl, + TLI.getPointerTy(DAG.getDataLayout()))); + setValue(&I, Result); + + return; + } case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4772,6 +4772,27 @@ "an array"); break; } + case Intrinsic::fptrunc_round: { + // Check the rounding mode + Metadata *MD = nullptr; + auto *MAV = dyn_cast(Call.getOperand(1)); + if (MAV) + MD = MAV->getMetadata(); + + Assert(MD != nullptr, "missing rounding mode argument", Call); + + Assert(isa(MD), + ("invalid value for llvm.fptrunc.round metadata operand" + " (the operand should be a string)"), + MD); + + Optional RoundMode = + convertStrToRoundingMode(cast(MD)->getString()); + Assert(RoundMode.hasValue() && + RoundMode.getValue() != RoundingMode::Dynamic, + "unsupported rounding mode argument", Call); + break; + } #define INSTRUCTION(NAME, NARGS, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -222,6 +222,9 @@ def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; + class GISelSop2Pat < SDPatternOperator node, Instruction inst, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -483,6 +483,9 @@ CONST_DATA_PTR, PC_ADD_REL_OFFSET, LDS, + FPTRUNC_ROUND_UPWARD, + FPTRUNC_ROUND_DOWNWARD, + DUMMY_CHAIN, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, LOAD_D16_HI, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4486,6 +4486,8 @@ NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) NODE_NAME_CASE(LDS) + NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD) + NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD) NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(LOAD_D16_HI) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -169,6 +169,8 @@ bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeImageIntrinsic( MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -839,6 +839,11 @@ .scalarize(0) .lower(); + getActionDefinitionsBuilder(G_INTRINSIC_FPTRUNC_ROUND) + .customFor({S16, S32}) + .scalarize(0) + .lower(); + // Lower roundeven into G_FRINT getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN}) .scalarize(0) @@ -1759,6 +1764,8 @@ case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTTZ: return legalizeCTLZ_CTTZ(MI, MRI, B); + case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND: + return legalizeFPTruncRound(MI, B); default: return false; } @@ -4963,6 +4970,27 @@ return true; } +bool AMDGPULegalizerInfo::legalizeFPTruncRound(MachineInstr &MI, + MachineIRBuilder &B) const { + unsigned Opc; + int RoundMode = MI.getOperand(2).getImm(); + + if (RoundMode == (int)RoundingMode::TowardPositive) + Opc = AMDGPU::G_FPTRUNC_ROUND_UPWARD; + else if (RoundMode == (int)RoundingMode::TowardNegative) + Opc = AMDGPU::G_FPTRUNC_ROUND_DOWNWARD; + else + return false; + + B.buildInstr(Opc) + .addDef(MI.getOperand(0).getReg()) + .addUse(MI.getOperand(1).getReg()); + + MI.eraseFromParent(); + + return true; +} + bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { MachineIRBuilder &B = Helper.MIRBuilder; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4570,6 +4570,9 @@ OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1); break; } + case AMDGPU::G_FPTRUNC_ROUND_UPWARD: + case AMDGPU::G_FPTRUNC_ROUND_DOWNWARD: + return getDefaultMappingVOP(MI); } return getInstructionMapping(/*ID*/1, /*Cost*/1, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -17,6 +17,7 @@ #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -602,6 +603,7 @@ setOperationAction(ISD::SINT_TO_FP, MVT::f16, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::f16, Promote); setOperationAction(ISD::FROUND, MVT::f16, Custom); + setOperationAction(ISD::FPTRUNC_ROUND, MVT::f16, Custom); // F16 - VOP2 Actions. setOperationAction(ISD::BR_CC, MVT::f16, Expand); @@ -4740,6 +4742,24 @@ return lowerBUILD_VECTOR(Op, DAG); case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); + case ISD::FPTRUNC_ROUND: { + unsigned Opc; + SDLoc DL(Op); + + if (Op.getOperand(0)->getValueType(0) != MVT::f32) + return SDValue(); + + // Get the rounding mode from the last operand + int RoundMode = cast(Op.getOperand(1))->getZExtValue(); + if (RoundMode == (int)RoundingMode::TowardPositive) + Opc = AMDGPUISD::FPTRUNC_ROUND_UPWARD; + else if (RoundMode == (int)RoundingMode::TowardNegative) + Opc = AMDGPUISD::FPTRUNC_ROUND_DOWNWARD; + else + return SDValue(); + + return DAG.getNode(Opc, DL, Op.getNode()->getVTList(), Op->getOperand(0)); + } case ISD::TRAP: return lowerTRAP(Op, DAG); case ISD::DEBUGTRAP: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -255,6 +255,14 @@ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] >; +def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD", + SDTFPRoundOp +>; + +def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD", + SDTFPRoundOp +>; + //===----------------------------------------------------------------------===// // ValueType helpers //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -186,6 +186,22 @@ let mayStore = 0; } +// Pseudo instructions used for @llvm.fptrunc.round upward +// and @llvm.fptrunc.round downward. +// These intrinsics will be legalized to G_FPTRUNC_ROUND_UPWARD +// and G_FPTRUNC_ROUND_DOWNWARD before being lowered to +// FPTRUNC_UPWARD_PSEUDO and FPTRUNC_DOWNWARD_PSEUDO. +// The final codegen is done in the ModeRegister pass. +let Uses = [MODE, EXEC] in { +def FPTRUNC_UPWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst), + (ins VGPR_32:$src0), + [(set f16:$vdst, (SIfptrunc_round_upward f32:$src0))]>; + +def FPTRUNC_DOWNWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst), + (ins VGPR_32:$src0), + [(set f16:$vdst, (SIfptrunc_round_downward f32:$src0))]>; +} // End Uses = [MODE, EXEC] + // Invert the exec mask and overwrite the inactive lanes of dst with inactive, // restoring it after we're done. let Defs = [SCC] in { @@ -3183,3 +3199,15 @@ // TODO: Should really base this on the call target let isConvergent = 1; } + +def G_FPTRUNC_ROUND_UPWARD : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$vdst); + let InOperandList = (ins type1:$src0); + let hasSideEffects = 0; +} + +def G_FPTRUNC_ROUND_DOWNWARD : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$vdst); + let InOperandList = (ins type1:$src0); + let hasSideEffects = 0; +} diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp --- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp @@ -162,7 +162,9 @@ // double precision setting. Status SIModeRegister::getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII) { - if (TII->usesFPDPRounding(MI)) { + if (TII->usesFPDPRounding(MI) || + MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO || + MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) { switch (MI.getOpcode()) { case AMDGPU::V_INTERP_P1LL_F16: case AMDGPU::V_INTERP_P1LV_F16: @@ -170,6 +172,18 @@ // f16 interpolation instructions need double precision round to zero return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO)); + case AMDGPU::FPTRUNC_UPWARD_PSEUDO: { + // Replacing the pseudo by a real instruction + MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); + return Status(FP_ROUND_MODE_DP(3), + FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_INF)); + } + case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: { + // Replacing the pseudo by a real instruction + MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); + return Status(FP_ROUND_MODE_DP(3), + FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEGINF)); + } default: return DefaultStatus; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -131,6 +131,10 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # +# DEBUG-NEXT: G_INTRINSIC_FPTRUNC_ROUND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# # DEBUG-NEXT: G_INTRINSIC_TRUNC (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll @@ -0,0 +1,11 @@ +; RUN: not --crash llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=FAIL +; RUN: not --crash llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=FAIL + +define amdgpu_gs void @test_fptrunc_round_legalization(double %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) { +; FAIL: LLVM ERROR: Cannot select + %res = call half @llvm.fptrunc.round.f64(double %a, metadata !"round.upward") + store half %res, half addrspace(1)* %out, align 4 + ret void +} + +declare half @llvm.fptrunc.round.f64(double, metadata) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s + +define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) { +; CHECK-LABEL: test_fptrunc_round_upward: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CHECK-NEXT: global_store_short v[6:7], v0, off +; CHECK-NEXT: s_endpgm + %res = call half @llvm.fptrunc.round(float %a, metadata !"round.upward") + store half %res, half addrspace(1)* %out, align 4 + ret void +} + +define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) { +; CHECK-LABEL: test_fptrunc_round_downward: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CHECK-NEXT: global_store_short v[6:7], v0, off +; CHECK-NEXT: s_endpgm + %res = call half @llvm.fptrunc.round(float %a, metadata !"round.downward") + store half %res, half addrspace(1)* %out, align 4 + ret void +} + +define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float %b, i32 %data0, <4 x i32> %data1, half addrspace(1)* %out) { +; CHECK-LABEL: test_fptrunc_round_upward_multiple_calls: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v1 +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2 +; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0 +; CHECK-NEXT: v_add_f16_e32 v0, v0, v2 +; CHECK-NEXT: v_add_f16_e32 v0, v1, v0 +; CHECK-NEXT: global_store_short v[7:8], v0, off +; CHECK-NEXT: s_endpgm + %res1 = call half @llvm.fptrunc.round(float %a, metadata !"round.upward") + %res2 = call half @llvm.fptrunc.round(float %b, metadata !"round.upward") + %res3 = call half @llvm.fptrunc.round(float %b, metadata !"round.downward") + %res4 = fadd half %res1, %res2 + %res5 = fadd half %res3, %res4 + store half %res5, half addrspace(1)* %out, align 4 + ret void +} + +declare half @llvm.fptrunc.round(float, metadata) diff --git a/llvm/test/Verifier/llvm.fptrunc.round.ll b/llvm/test/Verifier/llvm.fptrunc.round.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Verifier/llvm.fptrunc.round.ll @@ -0,0 +1,13 @@ +; RUN: not opt -verify < %s 2>&1 | FileCheck %s + +declare half @llvm.fptrunc.round(float, metadata) + +define void @test_fptrunc_round_dynamic(float %a) { +; CHECK: unsupported rounding mode argument + %res = call half @llvm.fptrunc.round(float %a, metadata !"round.dynamic") +; CHECK: unsupported rounding mode argument + %res1 = call half @llvm.fptrunc.round(float %a, metadata !"round.test") +; CHECK: invalid value for llvm.fptrunc.round metadata operand (the operand should be a string) + %res2 = call half @llvm.fptrunc.round(float %a, metadata i32 5) + ret void +}