Index: clang/test/Headers/__clang_hip_libdevice_declares.cpp =================================================================== --- /dev/null +++ clang/test/Headers/__clang_hip_libdevice_declares.cpp @@ -0,0 +1,143 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: amdgpu-registered-target, x86-registered-target + +// fp-contract, -no-enable-noundef-analysis and visibility are to just get the +// same output for openmp and hip. + +// RUN: %clang_cc1 -x c++ -fopenmp -fvisibility=default -ffp-contract=off -no-enable-noundef-analysis \ +// RUN: -internal-isystem %S/../../lib/Headers/openmp_wrappers \ +// RUN: -include __clang_openmp_device_functions.h \ +// RUN: -internal-isystem %S/../../lib/Headers/openmp_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -verify \ +// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm -fopenmp-is-device \ +// RUN: -o - %s | FileCheck --check-prefixes=CHECK,OPENMP,OPENMP-CPP %s + +// RUN: %clang_cc1 -x c -fopenmp -fvisibility=default -ffp-contract=off -no-enable-noundef-analysis \ +// RUN: -internal-isystem %S/../../lib/Headers/openmp_wrappers \ +// RUN: -include __clang_openmp_device_functions.h \ +// RUN: -internal-isystem %S/../../lib/Headers/openmp_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -verify \ +// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm -fopenmp-is-device \ +// RUN: -o - %s | FileCheck --check-prefixes=CHECK,OPENMP,OPENMP-C %s + +// RUN: %clang_cc1 -x hip -fvisibility=default -ffp-contract=off -no-enable-noundef-analysis \ +// RUN: -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -verify \ +// RUN: -emit-llvm -fcuda-is-device -o - \ +// RUN: -D__HIPCC_RTC__ %s | FileCheck -check-prefixes=CHECK,HIP %s + +// expected-no-diagnostics + +#ifdef __cplusplus +#include +#else +#include +#endif + +#ifdef _OPENMP +#define __device__ +#endif + +// static and overloadable to get the same function annotations between C, C++ and HIP. +#define TEST_FUNC_ATTRS static __device__ __attribute__((used,overloadable)) + +#ifdef _OPENMP +#pragma omp begin declare target +#endif + +// This function is a hack to get the same IR out of HIP and OpenMP. The HIP +// headers declare __cxa_* functions with these attributes, such that the +// attribute groups are different. update_cc_test_checks isn't smart enough to +// strip attributes from the checks, or semantically compare them. +__attribute__((__visibility__("default"))) +__attribute__((weak)) +__attribute__((noreturn)) +__attribute__((overloadable)) +// OPENMP-LABEL: define weak hidden void @_Z20attribute_check_hackv +// OPENMP-SAME: () #[[ATTR0:[0-9]+]] { +// OPENMP-NEXT: entry: +// OPENMP-NEXT: call void @llvm.trap() +// OPENMP-NEXT: unreachable +// +void attribute_check_hack(void) { + __builtin_trap(); +} + +// CHECK-LABEL: define internal float @_ZL18test_ockl_acos_f32f +// CHECK-SAME: (float [[SRC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr +// CHECK-NEXT: store float [[SRC]], ptr [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call float @__ocml_acos_f32(float [[TMP0]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: ret float [[CALL]] +// +TEST_FUNC_ATTRS float test_ockl_acos_f32(float src) { + return __ocml_acos_f32(src); +} + +// CHECK-LABEL: define internal float @_ZL15test_ockl_fdot2Dv2_DF16_S_fbi +// CHECK-SAME: (<2 x half> [[A:%.*]], <2 x half> [[B:%.*]], float [[C:%.*]], i1 zeroext [[S:%.*]], i32 [[S_INT:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5) +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5) +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca i8, align 1, addrspace(5) +// CHECK-NEXT: [[S_INT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[Y:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr +// CHECK-NEXT: [[S_INT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_INT_ADDR]] to ptr +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: [[Y_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y]] to ptr +// CHECK-NEXT: store <2 x half> [[A]], ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <2 x half> [[B]], ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[C]], ptr [[C_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[S]] to i8 +// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[S_ADDR_ASCAST]], align 1 +// CHECK-NEXT: store i32 [[S_INT]], ptr [[S_INT_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[S_ADDR_ASCAST]], align 1 +// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 +// CHECK-NEXT: [[CALL:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP0]], <2 x half> [[TMP1]], float [[TMP2]], i1 zeroext [[TOBOOL]]) #[[ATTR4]] +// CHECK-NEXT: store float [[CALL]], ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load <2 x half>, ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[C_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[S_INT_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK-NEXT: [[CALL2:%.*]] = call float @__ockl_fdot2(<2 x half> [[TMP4]], <2 x half> [[TMP5]], float [[TMP6]], i1 zeroext [[TOBOOL1]]) #[[ATTR4]] +// CHECK-NEXT: store float [[CALL2]], ptr [[Y_ASCAST]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[X_ASCAST]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[Y_ASCAST]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP8]], [[TMP9]] +// CHECK-NEXT: ret float [[ADD]] +// +TEST_FUNC_ATTRS float test_ockl_fdot2(__2f16 a, __2f16 b, float c, bool s, int s_int) { + float x = __ockl_fdot2(a, b, c, s); + float y = __ockl_fdot2(a, b, c, s_int); + return x + y; +} + + +#ifdef _OPENMP +#pragma omp end declare target +#endif +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// HIP: {{.*}} +// OPENMP-C: {{.*}} +// OPENMP-CPP: {{.*}}