This is an archive of the discontinued LLVM Phabricator instance.

Differential D85236

[CUDA] Work around a bug in rint() caused by a broken implementation provided by CUDA.
ClosedPublic

Authored by tra on Aug 4 2020, 12:05 PM.

Download Raw Diff

Details

Reviewers

jlebar

Commits

rG7d057efddc00: [CUDA] Work around a bug in rint/nearbyint caused by a broken implementation…

Summary

Normally math functions are forwarded to nv_* counterparts provided by CUDA's
libdevice bitcode. However, nv_rint*() functions there have a bug -- they use
round() which rounds *up* instead of rounding towards the nearest integer, so we
end up with rint(2.5f) producing 3.0 instead of expected 2.0. The broken bitcode
is not actually used by NVCC itself, which has both a work-around in CUDA
headers and, in recent versions, uses correct implementations in NVCC's built-ins.

This patch implements equivalent workaround and directs rint/rintf to
__builtin_rint/rintf that produce correct results.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

tra created this revision.Aug 4 2020, 12:05 PM

Herald added a project: Restricted Project. · View Herald TranscriptAug 4 2020, 12:05 PM

Herald added subscribers: sanjoy.google, bixia, yaxunl. · View Herald Transcript

tra requested review of this revision.Aug 4 2020, 12:05 PM

LGTM, and can we write a test in the test-suite?

This revision is now accepted and ready to land.Aug 4 2020, 12:25 PM

Harbormaster completed remote builds in B66976: Diff 282992.Aug 4 2020, 12:53 PM

tra mentioned this in D85352: [CUDA, test-suite] More test cases for rint() and nearint().Aug 5 2020, 12:47 PM

Also fixed the same bug in nearbyint().

This revision was landed with ongoing or failed builds.Aug 5 2020, 1:14 PM

Closed by commit rG7d057efddc00: [CUDA] Work around a bug in rint/nearbyint caused by a broken implementation… (authored by tra). · Explain Why

This revision was automatically updated to reflect the committed changes.

tra added a commit: rG7d057efddc00: [CUDA] Work around a bug in rint/nearbyint caused by a broken implementation….

Harbormaster completed remote builds in B67177: Diff 283359.Aug 5 2020, 2:34 PM

Revision Contents

Path

Size

clang/

lib/

Headers/

__clang_cuda_math.h

5 lines

Diff 282992

clang/lib/Headers/__clang_cuda_math.h

/*===---- __clang_cuda_math.h - Device-side CUDA math support --------------===		/*===---- __clang_cuda_math.h - Device-side CUDA math support --------------===
*		*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.		* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*		*
*===-----------------------------------------------------------------------===		*===-----------------------------------------------------------------------===
*/		*/
#ifndef __CLANG_CUDA_MATH_H__		#ifndef __CLANG_CUDA_MATH_H__
#define __CLANG_CUDA_MATH_H__		#define __CLANG_CUDA_MATH_H__
#ifndef __CUDA__		#ifndef __CUDA__
#error "This file is for CUDA compilation only."		#error "This file is for CUDA compilation only."
		Lint: Pre-merge checks Inline Actions clang-tidy: error: "This file is for CUDA compilation only." [clang-diagnostic-error] not useful Lint: Pre-merge checks: clang-tidy: error: "This file is for CUDA compilation only." [clang-diagnostic-error] [[https…
#endif		#endif

#ifndef __OPENMP_NVPTX__		#ifndef __OPENMP_NVPTX__
#if CUDA_VERSION < 9000		#if CUDA_VERSION < 9000
#error This file is intended to be used with CUDA-9+ only.		#error This file is intended to be used with CUDA-9+ only.
		Lint: Pre-merge checks Inline Actions clang-tidy: error: This file is intended to be used with CUDA-9+ only. [clang-diagnostic-error] not useful Lint: Pre-merge checks: clang-tidy: error: This file is intended to be used with CUDA-9+ only. [clang-diagnostic-error]…
#endif		#endif
#endif		#endif

// __DEVICE__ is a helper macro with common set of attributes for the wrappers		// __DEVICE__ is a helper macro with common set of attributes for the wrappers
// we implement in this file. We need static in order to avoid emitting unused		// we implement in this file. We need static in order to avoid emitting unused
// functions and __forceinline__ helps inlining these wrappers at -O1.		// functions and __forceinline__ helps inlining these wrappers at -O1.
#pragma push_macro("__DEVICE__")		#pragma push_macro("__DEVICE__")
#ifdef __OPENMP_NVPTX__		#ifdef __OPENMP_NVPTX__
Show All 22 Lines
// -ffast-math or -fcuda-approx-transcendentals are in effect.		// -ffast-math or -fcuda-approx-transcendentals are in effect.
#pragma push_macro("__FAST_OR_SLOW")		#pragma push_macro("__FAST_OR_SLOW")
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)		#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
#define __FAST_OR_SLOW(fast, slow) fast		#define __FAST_OR_SLOW(fast, slow) fast
#else		#else
#define __FAST_OR_SLOW(fast, slow) slow		#define __FAST_OR_SLOW(fast, slow) slow
#endif		#endif

__DEVICE__ int abs(int __a) { return __nv_abs(__a); }		__DEVICE__ int abs(int __a) { return __nv_abs(__a); }
		Lint: Pre-merge checks Inline Actions clang-tidy: error: unknown type name 'device' [clang-diagnostic-error] not useful clang-tidy: error: expected ';' after top level declarator [clang-diagnostic-error] not useful clang-tidy: error: use of undeclared identifier '__nv_abs' [clang-diagnostic-error] not useful Lint: Pre-merge checks: clang-tidy: error: unknown type name '__device__' [clang-diagnostic-error] [[https://github.
__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }		__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }
		Lint: Pre-merge checks Inline Actions clang-tidy: error: unknown type name 'device' [clang-diagnostic-error] not useful clang-tidy: error: expected ';' after top level declarator [clang-diagnostic-error] not useful clang-tidy: error: use of undeclared identifier '__nv_fabs' [clang-diagnostic-error] not useful Lint: Pre-merge checks: clang-tidy: error: unknown type name '__device__' [clang-diagnostic-error] [[https://github.
__DEVICE__ double acos(double __a) { return __nv_acos(__a); }		__DEVICE__ double acos(double __a) { return __nv_acos(__a); }
		Lint: Pre-merge checks Inline Actions clang-tidy: error: unknown type name 'device' [clang-diagnostic-error] not useful clang-tidy: error: expected ';' after top level declarator [clang-diagnostic-error] not useful clang-tidy: error: use of undeclared identifier '__nv_acos' [clang-diagnostic-error] not useful Lint: Pre-merge checks: clang-tidy: error: unknown type name '__device__' [clang-diagnostic-error] [[https://github.
__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }		__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }
		Lint: Pre-merge checks Inline Actions clang-tidy: error: unknown type name 'device' [clang-diagnostic-error] not useful clang-tidy: error: expected ';' after top level declarator [clang-diagnostic-error] not useful clang-tidy: error: use of undeclared identifier '__nv_acosf' [clang-diagnostic-error] not useful Lint: Pre-merge checks: clang-tidy: error: unknown type name '__device__' [clang-diagnostic-error] [[https://github.
__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }		__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }
		Lint: Pre-merge checks Inline Actions clang-tidy: error: unknown type name 'device' [clang-diagnostic-error] not useful clang-tidy: error: expected ';' after top level declarator [clang-diagnostic-error] not useful clang-tidy: error: use of undeclared identifier '__nv_acosh' [clang-diagnostic-error] not useful Lint: Pre-merge checks: clang-tidy: error: unknown type name '__device__' [clang-diagnostic-error] [[https://github.
__DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); }		__DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); }
		Lint: Pre-merge checks Inline Actions clang-tidy: error: unknown type name 'device' [clang-diagnostic-error] not useful clang-tidy: error: expected ';' after top level declarator [clang-diagnostic-error] not useful Lint: Pre-merge checks: clang-tidy: error: unknown type name '__device__' [clang-diagnostic-error] [[https://github.
__DEVICE__ double asin(double __a) { return __nv_asin(__a); }		__DEVICE__ double asin(double __a) { return __nv_asin(__a); }
__DEVICE__ float asinf(float __a) { return __nv_asinf(__a); }		__DEVICE__ float asinf(float __a) { return __nv_asinf(__a); }
__DEVICE__ double asinh(double __a) { return __nv_asinh(__a); }		__DEVICE__ double asinh(double __a) { return __nv_asinh(__a); }
__DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); }		__DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); }
__DEVICE__ double atan(double __a) { return __nv_atan(__a); }		__DEVICE__ double atan(double __a) { return __nv_atan(__a); }
__DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }		__DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }
__DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }		__DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }
__DEVICE__ float atanf(float __a) { return __nv_atanf(__a); }		__DEVICE__ float atanf(float __a) { return __nv_atanf(__a); }
▲ Show 20 Lines • Show All 174 Lines • ▼ Show 20 Lines	__DEVICE__ float remquof(float __a, float __b, int *__c) {
return __nv_remquof(__a, __b, __c);		return __nv_remquof(__a, __b, __c);
}		}
__DEVICE__ double rhypot(double __a, double __b) {		__DEVICE__ double rhypot(double __a, double __b) {
return __nv_rhypot(__a, __b);		return __nv_rhypot(__a, __b);
}		}
__DEVICE__ float rhypotf(float __a, float __b) {		__DEVICE__ float rhypotf(float __a, float __b) {
return __nv_rhypotf(__a, __b);		return __nv_rhypotf(__a, __b);
}		}
__DEVICE__ double rint(double __a) { return __nv_rint(__a); }		// __nv_rint* in libdevice is buggy and produces incorrect results.
__DEVICE__ float rintf(float __a) { return __nv_rintf(__a); }		__DEVICE__ double rint(double __a) { return __builtin_rint(__a); }
		__DEVICE__ float rintf(float __a) { return __builtin_rintf(__a); }
__DEVICE__ double rnorm(int __a, const double *__b) {		__DEVICE__ double rnorm(int __a, const double *__b) {
return __nv_rnorm(__a, __b);		return __nv_rnorm(__a, __b);
}		}
__DEVICE__ double rnorm3d(double __a, double __b, double __c) {		__DEVICE__ double rnorm3d(double __a, double __b, double __c) {
return __nv_rnorm3d(__a, __b, __c);		return __nv_rnorm3d(__a, __b, __c);
}		}
__DEVICE__ float rnorm3df(float __a, float __b, float __c) {		__DEVICE__ float rnorm3df(float __a, float __b, float __c) {
return __nv_rnorm3df(__a, __b, __c);		return __nv_rnorm3df(__a, __b, __c);
▲ Show 20 Lines • Show All 86 Lines • Show Last 20 Lines