diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -302,7 +302,7 @@ setOperationAction({ISD::FLOG, ISD::FLOG10, ISD::FEXP}, MVT::f32, Custom); - setOperationAction(ISD::FNEARBYINT, {MVT::f32, MVT::f64}, Custom); + setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom); setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom); diff --git a/llvm/test/CodeGen/AMDGPU/fnearbyint.ll b/llvm/test/CodeGen/AMDGPU/fnearbyint.ll --- a/llvm/test/CodeGen/AMDGPU/fnearbyint.ll +++ b/llvm/test/CodeGen/AMDGPU/fnearbyint.ll @@ -5,6 +5,7 @@ ; This should have the exactly the same output as the test for rint, ; so no need to check anything. +declare half @llvm.nearbyint.f16(half) #0 declare float @llvm.nearbyint.f32(float) #0 declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #0 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #0 @@ -13,6 +14,12 @@ declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) #0 +define amdgpu_kernel void @fnearbyint_f16(half addrspace(1)* %out, half %in) #1 { + %1 = call half @llvm.nearbyint.f16(half %in) + store half %1, half addrspace(1)* %out + ret void +} + define amdgpu_kernel void @fnearbyint_f32(float addrspace(1)* %out, float %in) #1 { entry: %0 = call float @llvm.nearbyint.f32(float %in)