Index: lib/Target/R600/AMDGPUISelLowering.h =================================================================== --- lib/Target/R600/AMDGPUISelLowering.h +++ lib/Target/R600/AMDGPUISelLowering.h @@ -44,6 +44,7 @@ /// \returns The resulting chain. SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; protected: Index: lib/Target/R600/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/R600/AMDGPUISelLowering.cpp +++ lib/Target/R600/AMDGPUISelLowering.cpp @@ -216,6 +216,10 @@ setOperationAction(ISD::FREM, MVT::f32, Custom); setOperationAction(ISD::FREM, MVT::f64, Custom); + if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { + setOperationAction(ISD::FRINT, MVT::f64, Custom); + } + if (!Subtarget->hasBFI()) { // fcopysign can be done in a single instruction with BFI. setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); @@ -410,6 +414,7 @@ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); case ISD::FREM: return LowerFREM(Op, DAG); + case ISD::FRINT: return LowerFRINT(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); } return Op; @@ -1240,6 +1245,27 @@ return DAG.getNode(ISD::FSUB, SL, VT, X, Mul); } +SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Src = Op.getOperand(0); + + assert(Op.getValueType() == MVT::f64); + + SDValue C1 = DAG.getConstantFP(0x1.0p+52, MVT::f64); + SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src); + + SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign); + SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign); + + SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src); + SDValue C2 = DAG.getConstantFP(0x1.fffffffffffffp+51, MVT::f64); + + EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64); + SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT); + + return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2); +} + SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDValue S0 = Op.getOperand(0); Index: test/CodeGen/R600/llvm.rint.f64.ll =================================================================== --- test/CodeGen/R600/llvm.rint.f64.ll +++ test/CodeGen/R600/llvm.rint.f64.ll @@ -1,30 +1,38 @@ ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; FUNC-LABEL: @f64 +; FUNC-LABEL: @rint_f64 ; CI: V_RNDNE_F64_e32 -define void @f64(double addrspace(1)* %out, double %in) { + +; SI-DAG: V_ADD_F64 +; SI-DAG: V_ADD_F64 +; SI-DAG V_CMP_GT_F64_e64 +; SI: V_CNDMASK_B32 +; SI: V_CNDMASK_B32 +; SI: S_ENDPGM +define void @rint_f64(double addrspace(1)* %out, double %in) { entry: %0 = call double @llvm.rint.f64(double %in) store double %0, double addrspace(1)* %out ret void } -; FUNC-LABEL: @v2f64 +; FUNC-LABEL: @rint_v2f64 ; CI: V_RNDNE_F64_e32 ; CI: V_RNDNE_F64_e32 -define void @v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { +define void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { entry: %0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in) store <2 x double> %0, <2 x double> addrspace(1)* %out ret void } -; FUNC-LABEL: @v4f64 +; FUNC-LABEL: @rint_v4f64 ; CI: V_RNDNE_F64_e32 ; CI: V_RNDNE_F64_e32 ; CI: V_RNDNE_F64_e32 ; CI: V_RNDNE_F64_e32 -define void @v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { +define void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { entry: %0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in) store <4 x double> %0, <4 x double> addrspace(1)* %out