diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h --- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -50,6 +50,9 @@ #include #include #include +#if __has_include("hip/hip_version.h") +#include "hip/hip_version.h" +#endif // __has_include("hip/hip_version.h") #else typedef __SIZE_TYPE__ size_t; // Define macros which are needed to declare HIP device API's without standard @@ -74,25 +77,35 @@ extern "C" { #endif //__cplusplus +#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405 +extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size); +extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr); +__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { + return (void *) __ockl_dm_alloc(__size); +} +__attribute__((weak)) inline __device__ void free(void *__ptr) { + __ockl_dm_dealloc((unsigned long long)__ptr); +} +#else // HIP version check #if __HIP_ENABLE_DEVICE_MALLOC__ __device__ void *__hip_malloc(__hip_size_t __size); __device__ void *__hip_free(void *__ptr); __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { return __hip_malloc(__size); } -__attribute__((weak)) inline __device__ void *free(void *__ptr) { - return __hip_free(__ptr); +__attribute__((weak)) inline __device__ void free(void *__ptr) { + __hip_free(__ptr); } #else __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) { __builtin_trap(); return (void *)0; } -__attribute__((weak)) inline __device__ void *free(void *__ptr) { +__attribute__((weak)) inline __device__ void free(void *__ptr) { __builtin_trap(); - return (void *)0; } #endif +#endif // HIP version check #ifdef __cplusplus } // extern "C" diff --git a/clang/test/Headers/hip-header.hip b/clang/test/Headers/hip-header.hip --- a/clang/test/Headers/hip-header.hip +++ b/clang/test/Headers/hip-header.hip @@ -4,7 +4,7 @@ // RUN: -internal-isystem %S/Inputs/include \ // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ -// RUN: -D__HIPCC_RTC__ | FileCheck %s +// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,NOMALLOC %s // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ // RUN: -internal-isystem %S/Inputs/include \ @@ -25,6 +25,13 @@ // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ // RUN: -D__HIPCC_RTC__ -std=c++14 | FileCheck -check-prefixes=CHECK,CXX14 %s +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ +// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ +// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \ +// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,MALLOC %s // expected-no-diagnostics @@ -120,14 +127,18 @@ #include // CHECK-LABEL: define{{.*}}@_Z11test_malloc // CHECK: call {{.*}}i8* @malloc(i64 -// CHECK: define weak {{.*}}i8* @malloc(i64 +// CHECK-LABEL: define weak {{.*}}i8* @malloc(i64 +// MALLOC: call i64 @__ockl_dm_alloc +// NOMALLOC: call void @llvm.trap __device__ void test_malloc(void *a) { a = malloc(42); } // CHECK-LABEL: define{{.*}}@_Z9test_free -// CHECK: call {{.*}}i8* @free(i8* -// CHECK: define weak {{.*}}i8* @free(i8* +// CHECK: call {{.*}}void @free(i8* +// CHECK-LABEL: define weak {{.*}}void @free(i8* +// MALLOC: call void @__ockl_dm_dealloc +// NOMALLOC: call void @llvm.trap __device__ void test_free(void *a) { free(a); }