Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -1204,7 +1204,8 @@ // If we are offloading to a target via OpenMP we need to include the // openmp_wrappers folder which contains alternative system headers. if (JA.isDeviceOffloading(Action::OFK_OpenMP) && - getToolChain().getTriple().isNVPTX()){ + (getToolChain().getTriple().isNVPTX() || + getToolChain().getTriple().isAMDGCN())) { if (!Args.hasArg(options::OPT_nobuiltininc)) { // Add openmp_wrappers/* to our system include path. This lets us wrap // standard library headers. Index: clang/lib/Headers/CMakeLists.txt =================================================================== --- clang/lib/Headers/CMakeLists.txt +++ clang/lib/Headers/CMakeLists.txt @@ -160,6 +160,7 @@ openmp_wrappers/__clang_openmp_device_functions.h openmp_wrappers/complex_cmath.h openmp_wrappers/new + openmp_wrappers/hip/hip_runtime.h ) set(output_dir ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}/include) Index: clang/lib/Headers/__clang_cuda_complex_builtins.h =================================================================== --- clang/lib/Headers/__clang_cuda_complex_builtins.h +++ clang/lib/Headers/__clang_cuda_complex_builtins.h @@ -27,6 +27,7 @@ // different but equivalent function versions. TODO: For OpenMP we currently // select the native builtins as the overload support for templates is lacking. #if !defined(_OPENMP) +#ifdef __NVPTX__ #define _ISNANd std::isnan #define _ISNANf std::isnan #define _ISINFd std::isinf @@ -41,7 +42,29 @@ #define _ABSf std::abs #define _LOGBd std::logb #define _LOGBf std::logb +#define _fmaxd max +#define _fmaxf max #else +#include <__clang_hip_libdevice_declares.h> +#define _ISNANd __ocml_isnan_f64 +#define _ISNANf __ocml_isnan_f32 +#define _ISINFd __ocml_isinf_f64 +#define _ISINFf __ocml_isinf_f32 +#define _ISFINITEd __ocml_isfinite_f64 +#define _ISFINITEf __ocml_isfinite_f32 +#define _COPYSIGNd __ocml_copysign_f64 +#define _COPYSIGNf __ocml_copysign_f32 +#define _SCALBNd __ocml_scalbn_f64 +#define _SCALBNf __ocml_scalbn_f32 +#define _ABSd __ocml_fabs_f64 +#define _ABSf __ocml_fabs_f32 +#define _LOGBd __ocml_logb_f64 +#define _LOGBf __ocml_logb_f32 +#define _fmaxd __ocml_fmax_f64 +#define _fmaxf __ocml_fmax_f32 +#endif +#else // OPENMP +#ifdef __NVPTX__ #define _ISNANd __nv_isnand #define _ISNANf __nv_isnanf #define _ISINFd __nv_isinfd @@ -56,6 +79,28 @@ #define _ABSf __nv_fabsf #define _LOGBd __nv_logb #define _LOGBf __nv_logbf +#define _fmaxd __nv_fmax +#define _fmaxf __nv_fmaxf +#else +// OPENMP and __AMDGCN__ +#include <__clang_hip_libdevice_declares.h> +#define _ISNANd __ocml_isnan_f64 +#define _ISNANf __ocml_isnan_f32 +#define _ISINFd __ocml_isinf_f64 +#define _ISINFf __ocml_isinf_f32 +#define _ISFINITEd __ocml_isfinite_f64 +#define _ISFINITEf __ocml_isfinite_f32 +#define _COPYSIGNd __ocml_copysign_f64 +#define _COPYSIGNf __ocml_copysign_f32 +#define _SCALBNd __ocml_scalbn_f64 +#define _SCALBNf __ocml_scalbn_f32 +#define _ABSd __ocml_fabs_f64 +#define _ABSf __ocml_fabs_f32 +#define _LOGBd __ocml_logb_f64 +#define _LOGBf __ocml_logb_f32 +#define _fmaxd __ocml_fmax_f64 +#define _fmaxf __ocml_fmax_f32 +#endif #endif #if defined(__cplusplus) @@ -167,7 +212,7 @@ // Can't use std::max, because that's defined in , and we don't // want to pull that in for every compile. The CUDA headers define // ::max(float, float) and ::max(double, double), which is sufficient for us. - double __logbw = _LOGBd(max(_ABSd(__c), _ABSd(__d))); + double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d))); if (_ISFINITEd(__logbw)) { __ilogbw = (int)__logbw; __c = _SCALBNd(__c, -__ilogbw); @@ -200,7 +245,7 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { int __ilogbw = 0; - float __logbw = _LOGBf(max(_ABSf(__c), _ABSf(__d))); + float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d))); if (_ISFINITEf(__logbw)) { __ilogbw = (int)__logbw; __c = _SCALBNf(__c, -__ilogbw); @@ -249,6 +294,8 @@ #undef _ABSf #undef _LOGBd #undef _LOGBf +#undef _fmaxd +#undef _fmaxf #ifdef _OPENMP #pragma omp end declare target Index: clang/lib/Headers/__clang_hip_cmath.h =================================================================== --- clang/lib/Headers/__clang_hip_cmath.h +++ clang/lib/Headers/__clang_hip_cmath.h @@ -20,8 +20,15 @@ #include #include +// __DEVICE__ is a helper macro with common set of attributes for the wrappers +// we implement in this file. We need static in order to avoid emitting unused +// functions. #pragma push_macro("__DEVICE__") +#ifdef __OPENMP_AMDGCN__ +#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) +#else #define __DEVICE__ static __device__ inline __attribute__((always_inline)) +#endif // Start with functions that cannot be defined by DEF macros below. #if defined(__cplusplus) Index: clang/lib/Headers/__clang_hip_libdevice_declares.h =================================================================== --- clang/lib/Headers/__clang_hip_libdevice_declares.h +++ clang/lib/Headers/__clang_hip_libdevice_declares.h @@ -299,8 +299,13 @@ typedef _Float16 __2f16 __attribute__((ext_vector_type(2))); typedef short __2i16 __attribute__((ext_vector_type(2))); +#if defined(__cplusplus) __device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, bool s); +#else +__device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, + float c, unsigned int s); +#endif __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16); __device__ __2f16 __ocml_cos_2f16(__2f16); Index: clang/lib/Headers/__clang_hip_math.h =================================================================== --- clang/lib/Headers/__clang_hip_math.h +++ clang/lib/Headers/__clang_hip_math.h @@ -19,16 +19,36 @@ #include #include +// __DEVICE__ is a helper macro with common set of attributes for the wrappers +// we implement in this file. We need static in order to avoid emitting unused +// functions and __forceinline__ helps inlining these wrappers at -O1. #pragma push_macro("__DEVICE__") +#pragma push_macro("__DEVICE_NOCE__") + +#ifdef __OPENMP_AMDGCN__ +#if defined(__cplusplus) +#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) +#define __DEVICE_NOCE__ static __attribute__((always_inline, nothrow)) +#else // !defined(__cplusplus), c openmp compilation +#define __DEVICE__ static __attribute__((always_inline, nothrow)) +#define __DEVICE_NOCE__ __DEVICE__ +#endif +#else // !__OPENMP_AMDGCN__, so this is for HIP-Clang which is always C++. #define __DEVICE__ static __device__ inline __attribute__((always_inline)) +#define __DEVICE_NOCE__ __DEVICE__ +#endif // A few functions return bool type starting only in C++11. #pragma push_macro("__RETURN_TYPE") +#ifdef __OPENMP_AMDGCN__ +#define __RETURN_TYPE int +#else #if defined(__cplusplus) #define __RETURN_TYPE bool #else #define __RETURN_TYPE int #endif +#endif #if defined (__cplusplus) && __cplusplus < 201103L // emulate static_assert on type sizes @@ -39,7 +59,12 @@ static const bool valid; }; -__DEVICE__ +// All following c-capable function defs have one of two macro modifiers: +// __DEVICE__ +// __DEVICE_NOCE__ same as __DEVICE__ but no constexpr for those functions +// that cannot return constexpr in c++. + +__DEVICE_NOCE__ void __suppress_unused_warning(bool b){}; template __DEVICE__ void __static_assert_equal_size() { @@ -55,7 +80,7 @@ #endif -__DEVICE__ +__DEVICE_NOCE__ uint64_t __make_mantissa_base8(const char *__tagp) { uint64_t __r = 0; while (__tagp) { @@ -72,7 +97,7 @@ return __r; } -__DEVICE__ +__DEVICE_NOCE__ uint64_t __make_mantissa_base10(const char *__tagp) { uint64_t __r = 0; while (__tagp) { @@ -89,7 +114,7 @@ return __r; } -__DEVICE__ +__DEVICE_NOCE__ uint64_t __make_mantissa_base16(const char *__tagp) { uint64_t __r = 0; while (__tagp) { @@ -244,13 +269,17 @@ __DEVICE__ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } -__DEVICE__ +__DEVICE_NOCE__ float frexpf(float __x, int *__nptr) { +#ifdef __OPENMP_AMDGCN__ + static __attribute__((address_space(5))) int __tmp; + float __r = __ocml_frexp_f32(__x, &__tmp); +#else int __tmp; float __r = __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp); +#endif *__nptr = __tmp; - return __r; } @@ -275,7 +304,7 @@ __DEVICE__ float j1f(float __x) { return __ocml_j1_f32(__x); } -__DEVICE__ +__DEVICE_NOCE__ float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if @@ -329,15 +358,22 @@ __DEVICE__ long int lroundf(float __x) { return __ocml_round_f32(__x); } -__DEVICE__ +__DEVICE_NOCE__ float modff(float __x, float *__iptr) { +#ifdef __OPENMP_AMDGCN__ + static __attribute__((address_space(5))) float __tmp; + float __r = __ocml_modf_f32(__x, &__tmp); +#else float __tmp; float __r = __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); +#endif *__iptr = __tmp; return __r; } +// FIXME need a c version of nanf +#if defined(__cplusplus) __DEVICE__ float nanf(const char *__tagp) { union { @@ -358,6 +394,7 @@ return __tmp.val; } +#endif __DEVICE__ float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); } @@ -383,7 +420,7 @@ __DEVICE__ float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); } -__DEVICE__ +__DEVICE_NOCE__ float normf(int __dim, const float *__a) { // TODO: placeholder until OCML adds support. float __r = 0; @@ -409,13 +446,17 @@ return __ocml_remainder_f32(__x, __y); } -__DEVICE__ +__DEVICE_NOCE__ float remquof(float __x, float __y, int *__quo) { +#ifdef __OPENMP_AMDGCN__ + static __attribute__((address_space(5))) int __tmp; + float __r = __ocml_remquo_f32( __x, __y, &__tmp); +#else int __tmp; float __r = __ocml_remquo_f32( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); +#endif *__quo = __tmp; - return __r; } @@ -435,7 +476,7 @@ return __ocml_rlen4_f32(__x, __y, __z, __w); } -__DEVICE__ +__DEVICE_NOCE__ float rnormf(int __dim, const float *__a) { // TODO: placeholder until OCML adds support. float __r = 0; @@ -465,19 +506,29 @@ __DEVICE__ __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); } -__DEVICE__ +__DEVICE_NOCE__ void sincosf(float __x, float *__sinptr, float *__cosptr) { +#ifdef __OPENMP_AMDGCN__ + static __attribute__((address_space(5))) float __tmp; + *__sinptr = __ocml_sincos_f32(__x, &__tmp); +#else float __tmp; *__sinptr = __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); +#endif *__cosptr = __tmp; } -__DEVICE__ +__DEVICE_NOCE__ void sincospif(float __x, float *__sinptr, float *__cosptr) { +#ifdef __OPENMP_AMDGCN__ + static __attribute__((address_space(5))) float __tmp; + *__sinptr = __ocml_sincospi_f32(__x, &__tmp); +#else float __tmp; *__sinptr = __ocml_sincospi_f32( __x, (__attribute__((address_space(5))) float *)&__tmp); +#endif *__cosptr = __tmp; } @@ -511,7 +562,7 @@ __DEVICE__ float y1f(float __x) { return __ocml_y1_f32(__x); } -__DEVICE__ +__DEVICE_NOCE__ float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if @@ -658,7 +709,7 @@ __DEVICE__ float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); } -__DEVICE__ +__DEVICE_NOCE__ void __sincosf(float __x, float *__sinptr, float *__cosptr) { *__sinptr = __ocml_native_sin_f32(__x); *__cosptr = __ocml_native_cos_f32(__x); @@ -770,11 +821,16 @@ __DEVICE__ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } -__DEVICE__ +__DEVICE_NOCE__ double frexp(double __x, int *__nptr) { +#ifdef __OPENMP_AMDGCN__ + static __attribute__((address_space(5))) int __tmp; + double __r = __ocml_frexp_f64(__x, &__tmp); +#else int __tmp; double __r = __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp); +#endif *__nptr = __tmp; return __r; } @@ -800,7 +856,7 @@ __DEVICE__ double j1(double __x) { return __ocml_j1_f64(__x); } -__DEVICE__ +__DEVICE_NOCE__ double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if @@ -854,16 +910,22 @@ __DEVICE__ long int lround(double __x) { return __ocml_round_f64(__x); } -__DEVICE__ +__DEVICE_NOCE__ double modf(double __x, double *__iptr) { +#ifdef __OPENMP_AMDGCN__ + static __attribute__((address_space(5))) double __tmp; + double __r = __ocml_modf_f64(__x, &__tmp); +#else double __tmp; double __r = __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); +#endif *__iptr = __tmp; - return __r; } +// FIXME need a c version of nan +#if defined(__cplusplus) __DEVICE__ double nan(const char *__tagp) { #if !_WIN32 @@ -891,6 +953,7 @@ return *reinterpret_cast(&__val); #endif } +#endif __DEVICE__ double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); } @@ -900,7 +963,7 @@ return __ocml_nextafter_f64(__x, __y); } -__DEVICE__ +__DEVICE_NOCE__ double norm(int __dim, const double *__a) { // TODO: placeholder until OCML adds support. double __r = 0; @@ -942,13 +1005,17 @@ return __ocml_remainder_f64(__x, __y); } -__DEVICE__ +__DEVICE_NOCE__ double remquo(double __x, double __y, int *__quo) { +#ifdef __OPENMP_AMDGCN__ + static __attribute__((address_space(5))) int __tmp; + double __r = __ocml_remquo_f64(__x, __y, &__tmp); +#else int __tmp; double __r = __ocml_remquo_f64( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); +#endif *__quo = __tmp; - return __r; } @@ -958,7 +1025,7 @@ __DEVICE__ double rint(double __x) { return __ocml_rint_f64(__x); } -__DEVICE__ +__DEVICE_NOCE__ double rnorm(int __dim, const double *__a) { // TODO: placeholder until OCML adds support. double __r = 0; @@ -1000,19 +1067,29 @@ __DEVICE__ double sin(double __x) { return __ocml_sin_f64(__x); } -__DEVICE__ +__DEVICE_NOCE__ void sincos(double __x, double *__sinptr, double *__cosptr) { +#ifdef __OPENMP_AMDGCN__ + static __attribute__((address_space(5))) double __tmp; + *__sinptr = __ocml_sincos_f64(__x, &__tmp); +#else double __tmp; *__sinptr = __ocml_sincos_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); +#endif *__cosptr = __tmp; } -__DEVICE__ +__DEVICE_NOCE__ void sincospi(double __x, double *__sinptr, double *__cosptr) { +#ifdef __OPENMP_AMDGCN__ + static __attribute__((address_space(5))) double __tmp; + *__sinptr = __ocml_sincospi_f64(__x, &__tmp); +#else double __tmp; *__sinptr = __ocml_sincospi_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); +#endif *__cosptr = __tmp; } @@ -1043,7 +1120,7 @@ __DEVICE__ double y1(double __x) { return __ocml_y1_f64(__x); } -__DEVICE__ +__DEVICE_NOCE__ double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if @@ -1231,6 +1308,7 @@ __DEVICE__ double min(double __x, double __y) { return fmin(__x, __y); } +#ifndef __OPENMP_AMDGCN__ __host__ inline static int min(int __arg1, int __arg2) { return std::min(__arg1, __arg2); } @@ -1239,7 +1317,9 @@ return std::max(__arg1, __arg2); } #endif +#endif // defined(__cplusplus) +#pragma pop_macro("__DEVICE_NOCE__") #pragma pop_macro("__DEVICE__") #pragma pop_macro("__RETURN_TYPE") Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h =================================================================== --- clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h +++ clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h @@ -14,11 +14,15 @@ #error "This file is for OpenMP compilation only." #endif +#ifdef __NVPTX__ #pragma omp begin declare variant match( \ device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) #ifdef __cplusplus +#include extern "C" { +#else +#include #endif #define __CUDA__ @@ -39,4 +43,41 @@ #pragma omp end declare variant +#endif // __NVPTX__ + +#ifdef __AMDGCN__ + +// __NO_INLINE__ prevents some x86 optimized macro definitions in system headers +#define __NO_INLINE__ 1 +#pragma omp begin declare variant match( \ + device = {arch(amdgcn)}, implementation = {extension(match_any)}) + +#ifdef __cplusplus +#include +extern "C" { +#else +#include +#endif + +#define __HIP__ +#define __OPENMP_AMDGCN__ + +#define __host__ __attribute__((host)) +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __shared__ __attribute__((shared)) +#define __constant__ __attribute__((constant)) +#define __private __attribute__((address_space(5))) + +/// Include declarations for libdevice functions. +#include <__clang_hip_libdevice_declares.h> + +#ifdef __cplusplus +} // extern "C" #endif + +#pragma omp end declare variant + +#endif // __AMDGCN__ + +#endif // __CLANG_OPENMP_DEVICE_FUNCTIONS_H__ Index: clang/lib/Headers/openmp_wrappers/cmath =================================================================== --- clang/lib/Headers/openmp_wrappers/cmath +++ clang/lib/Headers/openmp_wrappers/cmath @@ -16,14 +16,16 @@ #include_next -// Make sure we include our math.h overlay, it probably happend already but we -// need to be sure. +// Make sure we include our new and math.h overlays, it probably happened already +// but we need to be sure. +#include #include // We (might) need cstdlib because __clang_cuda_cmath.h below declares `abs` // which might live in cstdlib. #include +#ifdef __NVPTX__ // We need limits because __clang_cuda_cmath.h below uses `std::numeric_limit`. #include @@ -74,5 +76,17 @@ #undef __DEVICE__ #pragma omp end declare variant +#endif // __NVPTX__ -#endif +#ifdef __AMDGCN__ +#pragma omp begin declare variant match( \ + device = {arch(amdgcn)}, implementation = {extension(match_any, allow_templates)}) +#define __HIP__ +#define __OPENMP_AMDGCN__ +#include <__clang_hip_cmath.h> +#undef __HIP__ + +#pragma omp end declare variant +#endif // __AMDGCN__ + +#endif // __CLANG_OPENMP_CMATH_H__ Index: clang/lib/Headers/openmp_wrappers/complex =================================================================== --- clang/lib/Headers/openmp_wrappers/complex +++ clang/lib/Headers/openmp_wrappers/complex @@ -15,11 +15,20 @@ #endif // We require std::math functions in the complex builtins below. +#ifdef __NVPTX__ #include - #define __CUDA__ +#define __ARCHTYPES__ nvptx,nvptx64 +#endif // __NVPTX__ + +#ifdef __AMDGCN__ +#include <__clang_hip_libdevice_declares.h> +#define __HIP__ +#define __ARCHTYPES__ amdgcn +#endif // __AMDGCN__ + #include <__clang_cuda_complex_builtins.h> -#endif +#endif //__CLANG_OPENMP_COMPLEX__ // Grab the host header too. #include_next @@ -36,15 +45,17 @@ // arithmetic and calls to non-complex functions, all of which we can then // handle. #ifndef _LIBCPP_STD_VER +#ifndef _GLIBCXX_COMPLEX #pragma omp begin declare variant match( \ - device = {arch(nvptx, nvptx64)}, \ + device = {arch(__ARCHTYPES__)}, \ implementation = {extension(match_any, allow_templates)}) #include #pragma omp end declare variant -#endif +#endif // _GLIBCXX_COMPLEX +#endif // _LIBCPP_STD_VER -#endif +#endif // __cplusplus Index: clang/lib/Headers/openmp_wrappers/hip/hip_runtime.h =================================================================== --- /dev/null +++ clang/lib/Headers/openmp_wrappers/hip/hip_runtime.h @@ -0,0 +1,28 @@ +/*===-- hip_runtime - OpenMP hip_runtime.h wrapper for target regions ------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_OPENMP_HIP_RUNTIME_H__ +#define __CLANG_OPENMP_HIP_RUNTIME_H__ + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif + +#include +#define __OPENMP_AMDGCN__ +#include_next + +#pragma omp begin declare variant match( \ + device = {arch(amdgcn)}, implementation = {extension(match_any)}) + +#pragma omp end declare variant + +// Now get the actual hip headers + +#endif // __CLANG_OPENMP_HIP_RUNTIME_H__ Index: clang/lib/Headers/openmp_wrappers/math.h =================================================================== --- clang/lib/Headers/openmp_wrappers/math.h +++ clang/lib/Headers/openmp_wrappers/math.h @@ -48,4 +48,18 @@ #pragma omp end declare variant +#pragma omp begin declare variant match( \ + device = {arch(amdgcn)}, implementation = {extension(match_any)}) + +#ifndef __OPENMP_AMDGCN__ +#define __OPENMP_AMDGCN__ +#endif + +#ifndef __HIP__ +#define __HIP__ #endif + +#include <__clang_hip_math.h> +#pragma omp end declare variant + +#endif // __CLANG_OPENMP_MATH_H__ Index: clang/lib/Headers/openmp_wrappers/new =================================================================== --- clang/lib/Headers/openmp_wrappers/new +++ clang/lib/Headers/openmp_wrappers/new @@ -11,7 +11,7 @@ #include_next -#if defined(__NVPTX__) && defined(_OPENMP) +#if (defined(__NVPTX__) || defined(__AMDGCN__)) && defined(_OPENMP) #include Index: clang/test/Headers/Inputs/include/algorithm =================================================================== --- /dev/null +++ clang/test/Headers/Inputs/include/algorithm @@ -0,0 +1,21 @@ +#pragma once + +// Copied from libcxx + +namespace std { + +template + const T& + max(const T& a, const T& b); // constexpr in C++14 +template + const T& + max(const T& a, const T& b, Compare comp); // constexpr in C++14 + +template + const T& + min(const T& a, const T& b); // constexpr in C++14 +template + const T& + min(const T& a, const T& b, Compare comp); // constexpr in C++14 + +} Index: clang/test/Headers/Inputs/include/cstdint =================================================================== --- /dev/null +++ clang/test/Headers/Inputs/include/cstdint @@ -0,0 +1,21 @@ +#pragma once + +#include + +namespace std { +#ifdef __INT32_TYPE__ +using ::uint32_t; +#endif + +#ifdef __INT64_TYPE__ +using ::uint64_t; +#endif + +#ifdef __INTPTR_TYPE__ +using ::intptr_t; +using ::uintptr_t; +#else +#error Every target should have __INTPTR_TYPE__ +#endif + +} // namespace std Index: clang/test/Headers/Inputs/include/cstdlib =================================================================== --- clang/test/Headers/Inputs/include/cstdlib +++ clang/test/Headers/Inputs/include/cstdlib @@ -16,6 +16,7 @@ { using ::abs; +using ::size_t; inline long abs(long __i) { return __builtin_labs(__i); } Index: clang/test/Headers/Inputs/include/exception =================================================================== --- /dev/null +++ clang/test/Headers/Inputs/include/exception @@ -0,0 +1,17 @@ +#pragma once + +// Copied from libcxx + +namespace std { + +class exception +{ +public: + exception() noexcept; + exception(const exception&) noexcept; + exception& operator=(const exception&) noexcept; + virtual ~exception() noexcept; + virtual const char* what() const noexcept; +}; + +} Index: clang/test/Headers/Inputs/include/new =================================================================== --- /dev/null +++ clang/test/Headers/Inputs/include/new @@ -0,0 +1,50 @@ +#pragma once + +#include +#include + +// Copied from libcxx + +namespace std { + +struct nothrow_t { explicit nothrow_t() = default; }; +extern const nothrow_t nothrow; + +class bad_alloc + : public exception +{ +public: + bad_alloc(); + virtual ~bad_alloc(); + virtual const char* what() const; +}; + +class bad_array_new_length + : public bad_alloc +{ +public: + bad_array_new_length(); + virtual ~bad_array_new_length(); + virtual const char* what() const; +}; + +typedef void (*new_handler)(); +new_handler set_new_handler(new_handler); +new_handler get_new_handler(); + +} + +void* operator new(std::size_t __sz); +void* operator new(std::size_t __sz, const std::nothrow_t&); +void operator delete(void* __p); +void operator delete(void* __p, const std::nothrow_t&); + +void* operator new[](std::size_t __sz); +void* operator new[](std::size_t __sz, const std::nothrow_t&); +void operator delete[](void* __p); +void operator delete[](void* __p, const std::nothrow_t&); + +inline void* operator new (std::size_t, void* __p) {return __p;} +inline void* operator new[](std::size_t, void* __p) {return __p;} +inline void operator delete (void*, void*) {} +inline void operator delete[](void*, void*) {} Index: clang/test/Headers/Inputs/include/stdlib.h =================================================================== --- clang/test/Headers/Inputs/include/stdlib.h +++ clang/test/Headers/Inputs/include/stdlib.h @@ -4,3 +4,6 @@ #ifndef __cplusplus extern int abs(int __x) __attribute__((__const__)); #endif + +void free(void* ptr); +void* malloc(size_t size);