This is an archive of the discontinued LLVM Phabricator instance.

Differential D85236

[CUDA] Work around a bug in rint() caused by a broken implementation provided by CUDA.
ClosedPublic

Authored by tra on Aug 4 2020, 12:05 PM.

Download Raw Diff

Details

Reviewers

jlebar

Commits

rG7d057efddc00: [CUDA] Work around a bug in rint/nearbyint caused by a broken implementation…

Summary

Normally math functions are forwarded to nv_* counterparts provided by CUDA's
libdevice bitcode. However, nv_rint*() functions there have a bug -- they use
round() which rounds *up* instead of rounding towards the nearest integer, so we
end up with rint(2.5f) producing 3.0 instead of expected 2.0. The broken bitcode
is not actually used by NVCC itself, which has both a work-around in CUDA
headers and, in recent versions, uses correct implementations in NVCC's built-ins.

This patch implements equivalent workaround and directs rint/rintf to
__builtin_rint/rintf that produce correct results.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

tra created this revision.Aug 4 2020, 12:05 PM

Herald added a project: Restricted Project. · View Herald TranscriptAug 4 2020, 12:05 PM

Herald added subscribers: sanjoy.google, bixia, yaxunl. · View Herald Transcript

tra requested review of this revision.Aug 4 2020, 12:05 PM

LGTM, and can we write a test in the test-suite?

This revision is now accepted and ready to land.Aug 4 2020, 12:25 PM

Harbormaster completed remote builds in B66976: Diff 282992.Aug 4 2020, 12:53 PM

tra mentioned this in D85352: [CUDA, test-suite] More test cases for rint() and nearint().Aug 5 2020, 12:47 PM

Also fixed the same bug in nearbyint().

This revision was landed with ongoing or failed builds.Aug 5 2020, 1:14 PM

Closed by commit rG7d057efddc00: [CUDA] Work around a bug in rint/nearbyint caused by a broken implementation… (authored by tra). · Explain Why

This revision was automatically updated to reflect the committed changes.

tra added a commit: rG7d057efddc00: [CUDA] Work around a bug in rint/nearbyint caused by a broken implementation….

Harbormaster completed remote builds in B67177: Diff 283359.Aug 5 2020, 2:34 PM

Revision Contents

Path

Size

clang/

lib/

Headers/

__clang_cuda_math.h

9 lines

Diff 283363

clang/lib/Headers/__clang_cuda_math.h

Show First 20 Lines • Show All 189 Lines • ▼ Show 20 Lines
__DEVICE__ long lrintf(float __a) { return __float2int_rn(__a); }		__DEVICE__ long lrintf(float __a) { return __float2int_rn(__a); }
__DEVICE__ long lround(double __a) { return round(__a); }		__DEVICE__ long lround(double __a) { return round(__a); }
__DEVICE__ long lroundf(float __a) { return roundf(__a); }		__DEVICE__ long lroundf(float __a) { return roundf(__a); }
#endif		#endif
__DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }		__DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }
__DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }		__DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }
__DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }		__DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }
__DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }		__DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }
__DEVICE__ double nearbyint(double __a) { return __nv_nearbyint(__a); }		__DEVICE__ double nearbyint(double __a) { return __builtin_nearbyint(__a); }
__DEVICE__ float nearbyintf(float __a) { return __nv_nearbyintf(__a); }		__DEVICE__ float nearbyintf(float __a) { return __builtin_nearbyintf(__a); }
__DEVICE__ double nextafter(double __a, double __b) {		__DEVICE__ double nextafter(double __a, double __b) {
return __nv_nextafter(__a, __b);		return __nv_nextafter(__a, __b);
}		}
__DEVICE__ float nextafterf(float __a, float __b) {		__DEVICE__ float nextafterf(float __a, float __b) {
return __nv_nextafterf(__a, __b);		return __nv_nextafterf(__a, __b);
}		}
__DEVICE__ double norm(int __dim, const double *__t) {		__DEVICE__ double norm(int __dim, const double *__t) {
return __nv_norm(__dim, __t);		return __nv_norm(__dim, __t);
Show All 36 Lines	__DEVICE__ float remquof(float __a, float __b, int *__c) {
return __nv_remquof(__a, __b, __c);		return __nv_remquof(__a, __b, __c);
}		}
__DEVICE__ double rhypot(double __a, double __b) {		__DEVICE__ double rhypot(double __a, double __b) {
return __nv_rhypot(__a, __b);		return __nv_rhypot(__a, __b);
}		}
__DEVICE__ float rhypotf(float __a, float __b) {		__DEVICE__ float rhypotf(float __a, float __b) {
return __nv_rhypotf(__a, __b);		return __nv_rhypotf(__a, __b);
}		}
__DEVICE__ double rint(double __a) { return __nv_rint(__a); }		// __nv_rint* in libdevice is buggy and produces incorrect results.
__DEVICE__ float rintf(float __a) { return __nv_rintf(__a); }		__DEVICE__ double rint(double __a) { return __builtin_rint(__a); }
		__DEVICE__ float rintf(float __a) { return __builtin_rintf(__a); }
__DEVICE__ double rnorm(int __a, const double *__b) {		__DEVICE__ double rnorm(int __a, const double *__b) {
return __nv_rnorm(__a, __b);		return __nv_rnorm(__a, __b);
}		}
__DEVICE__ double rnorm3d(double __a, double __b, double __c) {		__DEVICE__ double rnorm3d(double __a, double __b, double __c) {
return __nv_rnorm3d(__a, __b, __c);		return __nv_rnorm3d(__a, __b, __c);
}		}
__DEVICE__ float rnorm3df(float __a, float __b, float __c) {		__DEVICE__ float rnorm3df(float __a, float __b, float __c) {
return __nv_rnorm3df(__a, __b, __c);		return __nv_rnorm3df(__a, __b, __c);
▲ Show 20 Lines • Show All 86 Lines • Show Last 20 Lines