diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -81,7 +81,15 @@ libc.src.stdio.stderr ) +set(TARGET_LIBM_ENTRYPOINTS + # math.h entrypoints + libc.src.math.sin + libc.src.math.round + libc.src.math.roundf + libc.src.math.roundl +) + set(TARGET_LLVMLIBC_ENTRYPOINTS ${TARGET_LIBC_ENTRYPOINTS} + ${TARGET_LIBM_ENTRYPOINTS} ) - diff --git a/libc/config/gpu/headers.txt b/libc/config/gpu/headers.txt --- a/libc/config/gpu/headers.txt +++ b/libc/config/gpu/headers.txt @@ -1,6 +1,7 @@ set(TARGET_PUBLIC_HEADERS libc.include.ctype libc.include.string + libc.include.math libc.include.fenv libc.include.errno libc.include.stdlib diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -18,6 +18,19 @@ return() endif() + # The GPU optionally depends on vendor libraries. If we emitted one of these + # entrypoints it means the user requested it and we should use it instead. + get_fq_target_name("${LIBC_TARGET_ARCHITECTURE}.vendor.${name}" fq_vendor_specific_target_name) + if(TARGET ${fq_vendor_specific_target_name}) + add_entrypoint_object( + ${name} + ALIAS + DEPENDS + .${LIBC_TARGET_ARCHITECTURE}.vendor.${name} + ) + return() + endif() + get_fq_target_name("generic.${name}" fq_generic_target_name) if(TARGET ${fq_generic_target_name}) add_entrypoint_object( diff --git a/libc/src/math/gpu/CMakeLists.txt b/libc/src/math/gpu/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/libc/src/math/gpu/CMakeLists.txt @@ -0,0 +1,34 @@ +# Math functions not yet available in the libc project, or those not yet tuned +# for GPU workloads are provided as wrappers over vendor libraries. If we find +# them ahead of time we will import them statically. Otherwise, we will keep +# them as external references and expect them to be resolved by the user when +# they compile. In the future,we will use implementations from the 'libc' +# project and not provide these wrappers. +add_subdirectory(vendor) + +# For the GPU we want to be able to optionally depend on the vendor libraries +# until we have a suitable replacement inside `libc`. +# TODO: We should have an option to enable or disable these on a per-function +# basis. +option(LIBC_GPU_VENDOR_MATH "Use vendor wrappers for GPU math" ON) +function(add_math_entrypoint_gpu_object name) + get_fq_target_name("vendor.${name}" fq_vendor_specific_target_name) + if(TARGET ${fq_vendor_specific_target_name} AND ${LIBC_GPU_VENDOR_MATH}) + return() + endif() + + add_entrypoint_object( + ${name} + ${ARGN} + ) +endfunction() + +add_math_entrypoint_gpu_object( + round + SRCS + round.cpp + HDRS + ../round.h + COMPILE_OPTIONS + -O2 +) diff --git a/libc/src/math/gpu/round.cpp b/libc/src/math/gpu/round.cpp new file mode 100644 --- /dev/null +++ b/libc/src/math/gpu/round.cpp @@ -0,0 +1,16 @@ +//===-- Implementation of the GPU round function --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/round.h" +#include "src/__support/common.h" + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(double, round, (double x)) { return __builtin_round(x); } + +} // namespace __llvm_libc diff --git a/libc/src/math/gpu/roundf.cpp b/libc/src/math/gpu/roundf.cpp new file mode 100644 --- /dev/null +++ b/libc/src/math/gpu/roundf.cpp @@ -0,0 +1,16 @@ +//===-- Implementation of the GPU roundf function -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/roundf.h" +#include "src/__support/common.h" + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(float, roundf, (float x)) { return __builtin_roundf(x); } + +} // namespace __llvm_libc diff --git a/libc/src/math/gpu/roundl.cpp b/libc/src/math/gpu/roundl.cpp new file mode 100644 --- /dev/null +++ b/libc/src/math/gpu/roundl.cpp @@ -0,0 +1,23 @@ +//===-- Implementation of the GPU roundl function -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/roundl.h" +#include "src/__support/FPUtil/PlatformDefs.h" +#include "src/__support/common.h" + +namespace __llvm_libc { + +#ifndef LONG_DOUBLE_IS_DOUBLE +#error "GPU targets do not support long doubles" +#endif + +LLVM_LIBC_FUNCTION(long double, roundl, (long double x)) { + return __builtin_round(x); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/gpu/vendor/CMakeLists.txt b/libc/src/math/gpu/vendor/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/libc/src/math/gpu/vendor/CMakeLists.txt @@ -0,0 +1,41 @@ +find_package(AMDDeviceLibs QUIET HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm) +if(AMDDeviceLibs_FOUND) + message(STATUS "Found the ROCm device library. Implementations falling back " + "to the vendor libraries will be resolved statically." + get_target_property(ocml_path ocml IMPORTED_LOCATION) + list(APPEND bitcode_link_flags + "SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${ocml_path}") +else() + message(STATUS "Could not find the ROCm device library. Unimplemented " + "functions will be an external reference to the vendor libraries." +endif() + +find_package(CUDAToolkit QUIET) +if(CUDAToolkit_FOUND) + set(libdevice_path ${CUDAToolkit_BIN_DIR}/../nvvm/libdevice/libdevice.10.bc) + if (EXISTS ${libdevice_path}) + message(STATUS "Found the CUDA device library. Implementations falling back " + "to the vendor libraries will be resolved statically." + list(APPEND bitcode_link_flags + "SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${libdevice_path}") + endif() +else() + message(STATUS "Could not find the ROCm device library. Unimplemented " + "functions will be an external reference to the vendor libraries." +endif() + +# FIXME: We need a way to pass the library to only the NVTPX / AMDGPU build. +# This shouldn't cause issues because we only link in needed symbols, but it +# will link in identity metadata from both libraries. This silences the warning. +list(APPEND bitcode_link_flags "-Wno-linker-warnings") + +add_entrypoint_object( + sin + SRCS + sin.cpp + HDRS + ../../sin.h + COMPILE_OPTIONS + ${bitcode_link_flags} + -O2 +) diff --git a/libc/src/math/gpu/vendor/amdgpu/amdgpu.h b/libc/src/math/gpu/vendor/amdgpu/amdgpu.h new file mode 100644 --- /dev/null +++ b/libc/src/math/gpu/vendor/amdgpu/amdgpu.h @@ -0,0 +1,25 @@ +//===-- AMDGPU specific definitions for math support ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_H +#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_H + +#include "declarations.h" +#include "platform.h" + +#include "src/__support/macros/attributes.h" + +namespace __llvm_libc { +namespace internal { + +LIBC_INLINE double sin(double x) { return __ocml_sin_f64(x); } + +} // namespace internal +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_H diff --git a/libc/src/math/gpu/vendor/amdgpu/declarations.h b/libc/src/math/gpu/vendor/amdgpu/declarations.h new file mode 100644 --- /dev/null +++ b/libc/src/math/gpu/vendor/amdgpu/declarations.h @@ -0,0 +1,20 @@ +//===-- AMDGPU specific declarations for math support ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_DECLARATIONS_H +#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_DECLARATIONS_H + +namespace __llvm_libc { + +extern "C" { +double __ocml_sin_f64(double); +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_DECLARATIONS_H diff --git a/libc/src/math/gpu/vendor/amdgpu/platform.h b/libc/src/math/gpu/vendor/amdgpu/platform.h new file mode 100644 --- /dev/null +++ b/libc/src/math/gpu/vendor/amdgpu/platform.h @@ -0,0 +1,110 @@ +//===-- AMDGPU specific platform definitions for math support -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H +#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H + +#include + +namespace __llvm_libc { + +// The ROCm device library uses control globals to alter codegen for the +// different targets. To avoid needing to link them in manually we simply +// define them here. +extern "C" { + +// Disable unsafe math optimizations in the implementation. +extern const uint8_t __oclc_unsafe_math_opt = 0; + +// Disable denormalization at zero optimizations in the implementation. +extern const uint8_t __oclc_daz_opt = 0; + +// Disable rounding optimizations for 32-bit square roots. +extern const uint8_t __oclc_correctly_rounded_sqrt32 = 0; + +// Disable finite math optimizations. +extern const uint8_t __oclc_finite_only_opt = 0; + +#if defined(__gfx700__) +extern const uint32_t __oclc_ISA_version = 7000; +#elif defined(__gfx701__) +extern const uint32_t __oclc_ISA_version = 7001; +#elif defined(__gfx702__) +extern const uint32_t __oclc_ISA_version = 7002; +#elif defined(__gfx703__) +extern const uint32_t __oclc_ISA_version = 7003; +#elif defined(__gfx704__) +extern const uint32_t __oclc_ISA_version = 7004; +#elif defined(__gfx705__) +extern const uint32_t __oclc_ISA_version = 7005; +#elif defined(__gfx801__) +extern const uint32_t __oclc_ISA_version = 8001; +#elif defined(__gfx802__) +extern const uint32_t __oclc_ISA_version = 8002; +#elif defined(__gfx803__) +extern const uint32_t __oclc_ISA_version = 8003; +#elif defined(__gfx805__) +extern const uint32_t __oclc_ISA_version = 8005; +#elif defined(__gfx810__) +extern const uint32_t __oclc_ISA_version = 8100; +#elif defined(__gfx900__) +extern const uint32_t __oclc_ISA_version = 9000; +#elif defined(__gfx902__) +extern const uint32_t __oclc_ISA_version = 9002; +#elif defined(__gfx904__) +extern const uint32_t __oclc_ISA_version = 9004; +#elif defined(__gfx906__) +extern const uint32_t __oclc_ISA_version = 9006; +#elif defined(__gfx908__) +extern const uint32_t __oclc_ISA_version = 9008; +#elif defined(__gfx909__) +extern const uint32_t __oclc_ISA_version = 9009; +#elif defined(__gfx90a__) +extern const uint32_t __oclc_ISA_version = 9010; +#elif defined(__gfx90c__) +extern const uint32_t __oclc_ISA_version = 9012; +#elif defined(__gfx940__) +extern const uint32_t __oclc_ISA_version = 9400; +#elif defined(__gfx1010__) +extern const uint32_t __oclc_ISA_version = 10100; +#elif defined(__gfx1011__) +extern const uint32_t __oclc_ISA_version = 10101; +#elif defined(__gfx1012__) +extern const uint32_t __oclc_ISA_version = 10102; +#elif defined(__gfx1013__) +extern const uint32_t __oclc_ISA_version = 10103; +#elif defined(__gfx1030__) +extern const uint32_t __oclc_ISA_version = 10300; +#elif defined(__gfx1031__) +extern const uint32_t __oclc_ISA_version = 10301; +#elif defined(__gfx1032__) +extern const uint32_t __oclc_ISA_version = 10302; +#elif defined(__gfx1033__) +extern const uint32_t __oclc_ISA_version = 10303; +#elif defined(__gfx1034__) +extern const uint32_t __oclc_ISA_version = 10304; +#elif defined(__gfx1035__) +extern const uint32_t __oclc_ISA_version = 10305; +#elif defined(__gfx1036__) +extern const uint32_t __oclc_ISA_version = 10306; +#elif defined(__gfx1100__) +extern const uint32_t __oclc_ISA_version = 11000; +#elif defined(__gfx1101__) +extern const uint32_t __oclc_ISA_version = 11001; +#elif defined(__gfx1102__) +extern const uint32_t __oclc_ISA_version = 11002; +#elif defined(__gfx1103__) +extern const uint32_t __oclc_ISA_version = 11003; +#else +#error "Unknown AMDGPU architecture" +#endif +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H diff --git a/libc/src/math/gpu/vendor/common.h b/libc/src/math/gpu/vendor/common.h new file mode 100644 --- /dev/null +++ b/libc/src/math/gpu/vendor/common.h @@ -0,0 +1,22 @@ +//===-- Common interface for compiling the GPU math -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GPU_COMMON_H +#define LLVM_LIBC_SRC_MATH_GPU_COMMON_H + +#include "src/__support/macros/properties/architectures.h" + +#if defined(LIBC_TARGET_ARCH_IS_AMDGPU) +#include "amdgpu/amdgpu.h" +#elif defined(LIBC_TARGET_ARCH_IS_NVPTX) +#include "nvptx/nvptx.h" +#else +#error "Unsupported platform" +#endif + +#endif // LLVM_LIBC_SRC_MATH_GPU_COMMON_H diff --git a/libc/src/math/gpu/vendor/nvptx/declarations.h b/libc/src/math/gpu/vendor/nvptx/declarations.h new file mode 100644 --- /dev/null +++ b/libc/src/math/gpu/vendor/nvptx/declarations.h @@ -0,0 +1,20 @@ +//===-- NVPTX specific declarations for math support ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GPU_NVPTX_DECLARATIONS_H +#define LLVM_LIBC_SRC_MATH_GPU_NVPTX_DECLARATIONS_H + +namespace __llvm_libc { + +extern "C" { +double __nv_sin(double); +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_GPU_NVPTX_DECLARATIONS_H diff --git a/libc/src/math/gpu/vendor/nvptx/nvptx.h b/libc/src/math/gpu/vendor/nvptx/nvptx.h new file mode 100644 --- /dev/null +++ b/libc/src/math/gpu/vendor/nvptx/nvptx.h @@ -0,0 +1,24 @@ +//===-- NVPTX specific definitions for math support -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GPU_NVPTX_H +#define LLVM_LIBC_SRC_MATH_GPU_NVPTX_H + +#include "declarations.h" + +#include "src/__support/macros/attributes.h" + +namespace __llvm_libc { +namespace internal { + +LIBC_INLINE double sin(double x) { return __nv_sin(x); } + +} // namespace internal +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_GPU_NVPTX_H diff --git a/libc/src/math/gpu/vendor/sin.cpp b/libc/src/math/gpu/vendor/sin.cpp new file mode 100644 --- /dev/null +++ b/libc/src/math/gpu/vendor/sin.cpp @@ -0,0 +1,18 @@ +//===-- Implementation of the sin function for GPU ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/sin.h" +#include "src/__support/common.h" + +#include "common.h" + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(double, sin, (double x)) { return internal::sin(x); } + +} // namespace __llvm_libc