diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h --- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -47,28 +47,9 @@ #endif //__cplusplus #if !defined(__HIPCC_RTC__) -#include -#include -#include #if __has_include("hip/hip_version.h") #include "hip/hip_version.h" #endif // __has_include("hip/hip_version.h") -#else -typedef __SIZE_TYPE__ size_t; -// Define macros which are needed to declare HIP device API's without standard -// C/C++ headers. This is for readability so that these API's can be written -// the same way as non-hipRTC use case. These macros need to be popped so that -// they do not pollute users' name space. -#pragma push_macro("NULL") -#pragma push_macro("uint32_t") -#pragma push_macro("uint64_t") -#pragma push_macro("CHAR_BIT") -#pragma push_macro("INT_MAX") -#define NULL (void *)0 -#define uint32_t __UINT32_TYPE__ -#define uint64_t __UINT64_TYPE__ -#define CHAR_BIT __CHAR_BIT__ -#define INT_MAX __INTMAX_MAX__ #endif // __HIPCC_RTC__ typedef __SIZE_TYPE__ __hip_size_t; @@ -78,11 +59,13 @@ #endif //__cplusplus #if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405 -extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size); -extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr); +__device__ unsigned long long __ockl_dm_alloc(unsigned long long __size); +__device__ void __ockl_dm_dealloc(unsigned long long __addr); #if __has_feature(address_sanitizer) -extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc); -extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc); +__device__ unsigned long long __asan_malloc_impl(unsigned long long __size, + unsigned long long __pc); +__device__ void __asan_free_impl(unsigned long long __addr, + unsigned long long __pc); __attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) { unsigned long long __pc = (unsigned long long)__builtin_return_address(0); return (void *)__asan_malloc_impl(__size, __pc); @@ -91,7 +74,7 @@ unsigned long long __pc = (unsigned long long)__builtin_return_address(0); __asan_free_impl((unsigned long long)__ptr, __pc); } -#else +#else // __has_feature(address_sanitizer) __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { return (void *) __ockl_dm_alloc(__size); } @@ -109,7 +92,7 @@ __attribute__((weak)) inline __device__ void free(void *__ptr) { __hip_free(__ptr); } -#else +#else // __HIP_ENABLE_DEVICE_MALLOC__ __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { __builtin_trap(); return (void *)0; @@ -117,13 +100,38 @@ __attribute__((weak)) inline __device__ void free(void *__ptr) { __builtin_trap(); } -#endif +#endif // __HIP_ENABLE_DEVICE_MALLOC__ #endif // HIP version check #ifdef __cplusplus } // extern "C" #endif //__cplusplus +#if !defined(__HIPCC_RTC__) +#include +#include +#include +#if __has_include("hip/hip_version.h") +#include "hip/hip_version.h" +#endif // __has_include("hip/hip_version.h") +#else +typedef __SIZE_TYPE__ size_t; +// Define macros which are needed to declare HIP device API's without standard +// C/C++ headers. This is for readability so that these API's can be written +// the same way as non-hipRTC use case. These macros need to be popped so that +// they do not pollute users' name space. +#pragma push_macro("NULL") +#pragma push_macro("uint32_t") +#pragma push_macro("uint64_t") +#pragma push_macro("CHAR_BIT") +#pragma push_macro("INT_MAX") +#define NULL (void *)0 +#define uint32_t __UINT32_TYPE__ +#define uint64_t __UINT64_TYPE__ +#define CHAR_BIT __CHAR_BIT__ +#define INT_MAX __INTMAX_MAX__ +#endif // __HIPCC_RTC__ + #include <__clang_hip_libdevice_declares.h> #include <__clang_hip_math.h> #include <__clang_hip_stdlib.h> diff --git a/clang/test/Headers/Inputs/include/cstdlib b/clang/test/Headers/Inputs/include/cstdlib --- a/clang/test/Headers/Inputs/include/cstdlib +++ b/clang/test/Headers/Inputs/include/cstdlib @@ -26,5 +26,7 @@ float abs(float __x) { return fabs(__x); } double abs(double __x) { return fabs(__x); } +using ::malloc; +using ::free; } diff --git a/clang/test/Headers/Inputs/include/math.h b/clang/test/Headers/Inputs/include/math.h --- a/clang/test/Headers/Inputs/include/math.h +++ b/clang/test/Headers/Inputs/include/math.h @@ -105,8 +105,6 @@ long lrintf(float __a); long lround(double __a); long lroundf(float __a); -int max(int __a, int __b); -int min(int __a, int __b); double modf(double __a, double *__b); float modff(float __a, float *__b); double nearbyint(double __a); diff --git a/clang/test/Headers/Inputs/include/sstream b/clang/test/Headers/Inputs/include/sstream new file mode 100644 diff --git a/clang/test/Headers/Inputs/include/stdexcept b/clang/test/Headers/Inputs/include/stdexcept new file mode 100644 diff --git a/clang/test/Headers/hip-header.hip b/clang/test/Headers/hip-header.hip --- a/clang/test/Headers/hip-header.hip +++ b/clang/test/Headers/hip-header.hip @@ -31,7 +31,14 @@ // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ // RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \ -// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,MALLOC %s +// RUN: -D__HIPCC_RTC__ -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ +// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ +// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \ +// RUN: -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ // RUN: -internal-isystem %S/Inputs/include \ @@ -40,6 +47,13 @@ // RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \ // RUN: -fsanitize=address -disable-llvm-passes -D__HIPCC_RTC__ \ // RUN: | FileCheck -check-prefixes=MALLOC-ASAN %s +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -aux-triple amdgcn-amd-amdhsa -triple x86_64-unknown-unknown \ +// RUN: -emit-llvm %s -o - \ +// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \ +// RUN: -disable-llvm-passes | FileCheck -check-prefixes=MALLOC-HOST %s // expected-no-diagnostics @@ -133,9 +147,10 @@ // Check that device malloc and free do not conflict with std headers. #include -// CHECK-LABEL: define{{.*}}@_Z11test_malloc -// CHECK: call {{.*}}ptr @malloc(i64 -// CHECK-LABEL: define weak {{.*}}ptr @malloc(i64 +// MALLOC-LABEL: define{{.*}}@_Z11test_malloc +// MALLOC: call {{.*}}ptr @malloc(i64 +// MALLOC: call {{.*}}ptr @malloc(i64 +// MALLOC-LABEL: define weak {{.*}}ptr @malloc(i64 // MALLOC: call i64 @__ockl_dm_alloc // NOMALLOC: call void @llvm.trap // MALLOC-ASAN-LABEL: define weak {{.*}}ptr @malloc(i64 @@ -143,11 +158,13 @@ // MALLOC-ASAN: call i64 @__asan_malloc_impl(i64 {{.*}}, i64 {{.*}}) __device__ void test_malloc(void *a) { a = malloc(42); + a = std::malloc(42); } -// CHECK-LABEL: define{{.*}}@_Z9test_free -// CHECK: call {{.*}}void @free(ptr -// CHECK-LABEL: define weak {{.*}}void @free(ptr +// MALLOC-LABEL: define{{.*}}@_Z9test_free +// MALLOC: call {{.*}}void @free(ptr +// MALLOC: call {{.*}}void @free(ptr +// MALLOC-LABEL: define weak {{.*}}void @free(ptr // MALLOC: call void @__ockl_dm_dealloc // NOMALLOC: call void @llvm.trap // MALLOC-ASAN-LABEL: define weak {{.*}}void @free(ptr @@ -155,4 +172,17 @@ // MALLOC-ASAN: call void @__asan_free_impl(i64 {{.*}}, i64 {{.*}}) __device__ void test_free(void *a) { free(a); + std::free(a); +} + +// MALLOC-HOST-LABEL: define{{.*}}@_Z16test_malloc_host +// MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64 +// MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr +// MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64 +// MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr +void test_malloc_host(void *a) { + a = malloc(42); + free(a); + a = std::malloc(42); + std::free(a); }