diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -60,6 +60,7 @@ __clang_hip_libdevice_declares.h __clang_hip_cmath.h __clang_hip_math.h + __clang_hip_stdlib.h __clang_hip_runtime_wrapper.h ) diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h --- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -113,6 +113,7 @@ #include <__clang_hip_libdevice_declares.h> #include <__clang_hip_math.h> +#include <__clang_hip_stdlib.h> #if defined(__HIPCC_RTC__) #include <__clang_hip_cmath.h> diff --git a/clang/lib/Headers/__clang_hip_stdlib.h b/clang/lib/Headers/__clang_hip_stdlib.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/__clang_hip_stdlib.h @@ -0,0 +1,43 @@ +/*===---- __clang_hip_stdlib.h - Device-side HIP math support --------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __CLANG_HIP_STDLIB_H__ + +#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) +#error "This file is for HIP and OpenMP AMDGCN device compilation only." +#endif + +#if !defined(__cplusplus) + +#include + +#ifdef __OPENMP_AMDGCN__ +#define __DEVICE__ static inline __attribute__((always_inline, nothrow)) +#else +#define __DEVICE__ static __device__ inline __attribute__((always_inline)) +#endif + +__DEVICE__ +int abs(int __x) { + int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); + return (__x ^ __sgn) - __sgn; +} +__DEVICE__ +long labs(long __x) { + long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); + return (__x ^ __sgn) - __sgn; +} +__DEVICE__ +long long llabs(long long __x) { + long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); + return (__x ^ __sgn) - __sgn; +} + +#endif // !defined(__cplusplus) + +#endif // #define __CLANG_HIP_STDLIB_H__ diff --git a/clang/lib/Headers/openmp_wrappers/stdlib.h b/clang/lib/Headers/openmp_wrappers/stdlib.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/openmp_wrappers/stdlib.h @@ -0,0 +1,29 @@ +/*===---- openmp_wrapper/stdlib.h ------ OpenMP math.h intercept ----- c++ -=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_OPENMP_STDLIB_H__ +#define __CLANG_OPENMP_STDLIB_H__ + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif + +#include_next + +#ifdef __AMDGCN__ +#pragma omp begin declare variant match(device = {arch(amdgcn)}) + +#define __OPENMP_AMDGCN__ +#include <__clang_hip_stdlib.h> +#undef __OPENMP_AMDGCN__ + +#pragma omp end declare variant +#endif + +#endif // __CLANG_OPENMP_STDLIB_H__ diff --git a/clang/test/Headers/Inputs/include/stdlib.h b/clang/test/Headers/Inputs/include/stdlib.h --- a/clang/test/Headers/Inputs/include/stdlib.h +++ b/clang/test/Headers/Inputs/include/stdlib.h @@ -6,4 +6,6 @@ #ifndef __cplusplus extern int abs(int __x) __attribute__((__const__)); +extern long labs(long __x) __attribute__((__const__)); +extern long long llabs(long long __x) __attribute__((__const__)); #endif diff --git a/clang/test/Headers/amdgcn_openmp_device_math_c.c b/clang/test/Headers/amdgcn_openmp_device_math_c.c new file mode 100644 --- /dev/null +++ b/clang/test/Headers/amdgcn_openmp_device_math_c.c @@ -0,0 +1,131 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -D__OFFLOAD_ARCH_gfx90a__ -emit-llvm-bc %s -o %t-host.bc +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK +// REQUIRES: amdgpu-registered-target + +#include + +void test_math_int(int x) { +#pragma omp target + { + int l1 = abs(x); + } +} + +void test_math_long(long x) { +#pragma omp target + { + long l1 = labs(x); + } +} + +void test_math_long_long(long long x) { +#pragma omp target + { + long long l1 = llabs(x); + } +} +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_math_int_l9 +// CHECK-SAME: (i64 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[__SGN_I:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[L1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: [[L1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L1]] to ptr +// CHECK-NEXT: store i64 [[X]], ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 1, i1 true, i1 true) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr +// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr +// CHECK-NEXT: [[__SGN_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__SGN_I]] to ptr +// CHECK-NEXT: store i32 [[TMP1]], ptr [[__X_ADDR_ASCAST_I]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[__X_ADDR_ASCAST_I]], align 4 +// CHECK-NEXT: [[SHR_I:%.*]] = ashr i32 [[TMP2]], 31 +// CHECK-NEXT: store i32 [[SHR_I]], ptr [[__SGN_ASCAST_I]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[__X_ADDR_ASCAST_I]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[__SGN_ASCAST_I]], align 4 +// CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[TMP3]], [[TMP4]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[__SGN_ASCAST_I]], align 4 +// CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i32 [[XOR_I]], [[TMP5]] +// CHECK-NEXT: store i32 [[SUB_I]], ptr [[L1_ASCAST]], align 4 +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true) +// CHECK-NEXT: ret void +// CHECK: worker.exit: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_math_long_l16 +// CHECK-SAME: (i64 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[__SGN_I:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[L1:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: [[L1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L1]] to ptr +// CHECK-NEXT: store i64 [[X]], ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true, i1 true) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr +// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr +// CHECK-NEXT: [[__SGN_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__SGN_I]] to ptr +// CHECK-NEXT: store i64 [[TMP1]], ptr [[__X_ADDR_ASCAST_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_ASCAST_I]], align 8 +// CHECK-NEXT: [[SHR_I:%.*]] = ashr i64 [[TMP2]], 63 +// CHECK-NEXT: store i64 [[SHR_I]], ptr [[__SGN_ASCAST_I]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[__X_ADDR_ASCAST_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[__SGN_ASCAST_I]], align 8 +// CHECK-NEXT: [[XOR_I:%.*]] = xor i64 [[TMP3]], [[TMP4]] +// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[__SGN_ASCAST_I]], align 8 +// CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i64 [[XOR_I]], [[TMP5]] +// CHECK-NEXT: store i64 [[SUB_I]], ptr [[L1_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true) +// CHECK-NEXT: ret void +// CHECK: worker.exit: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_math_long_long_l23 +// CHECK-SAME: (i64 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[__SGN_I:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[L1:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: [[L1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L1]] to ptr +// CHECK-NEXT: store i64 [[X]], ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true, i1 true) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr +// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr +// CHECK-NEXT: [[__SGN_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__SGN_I]] to ptr +// CHECK-NEXT: store i64 [[TMP1]], ptr [[__X_ADDR_ASCAST_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[__X_ADDR_ASCAST_I]], align 8 +// CHECK-NEXT: [[SHR_I:%.*]] = ashr i64 [[TMP2]], 63 +// CHECK-NEXT: store i64 [[SHR_I]], ptr [[__SGN_ASCAST_I]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[__X_ADDR_ASCAST_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[__SGN_ASCAST_I]], align 8 +// CHECK-NEXT: [[XOR_I:%.*]] = xor i64 [[TMP3]], [[TMP4]] +// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[__SGN_ASCAST_I]], align 8 +// CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i64 [[XOR_I]], [[TMP5]] +// CHECK-NEXT: store i64 [[SUB_I]], ptr [[L1_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true) +// CHECK-NEXT: ret void +// CHECK: worker.exit: +// CHECK-NEXT: ret void +// diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -85,6 +85,7 @@ "__clang_hip_cmath.h", "__clang_hip_libdevice_declares.h", "__clang_hip_math.h", + "__clang_hip_stdlib.h", "__clang_hip_runtime_wrapper.h", "__stddef_max_align_t.h", "__wmmintrin_aes.h", @@ -192,6 +193,7 @@ "openmp_wrappers/complex.h", "openmp_wrappers/complex_cmath.h", "openmp_wrappers/math.h", + "openmp_wrappers/stdlib.h", "pconfigintrin.h", "pkuintrin.h", "pmmintrin.h",