Changeset View
Standalone View
clang/lib/Headers/offload_macros.h
- This file was added.
//===--- offload_macros.h - Universal _DEVICE Offloading Macros Header ---===// | |||||
// | |||||
jdoerfert: After @MaskRay noticed this, I think this should be `__offload_macros.h` to make it clear this… | |||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||||
// See https://llvm.org/LICENSE.txt for license information. | |||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||||
// | |||||
//===-----------------------------------------------------------------------=== | |||||
// | |||||
// This header creates macros _DEVICE_ARCH and _DEVICE_GPU with values. This | |||||
// header exists because compiler macros are inconsistent in specifying if a | |||||
// compiliation is a device pass or a host pass. There is also inconsistency in | |||||
// how the device architecture and type are specified during a device pass. The | |||||
// inconsistencies are between OpenMP, CUDA, HIP, and OpenCL. The macro logic | |||||
// in this header is aware of these inconsistencies and sets useful values for | |||||
// _DEVICE_ARCH and _DEVICE_GPU during a device compilation. The macros will | |||||
// not be defined during a host compilation pass. So "#ifndef _DEVICE_ARCH" can | |||||
// be used by users to imply a host compilation. This header must remain a | |||||
// preprocessing header only because it is intended to be used by different | |||||
// languages. | |||||
// | |||||
//===----------------------------------------------------------------------===// | |||||
//===----------------------------------------------------------------------===// | |||||
Not Done ReplyInline ActionsNit duplicate jdoerfert: Nit duplicate | |||||
#ifndef __OFFLOAD_MACROS_H__ | |||||
Lint: Pre-merge checks clang-tidy: warning: header guard does not follow preferred style [llvm-header-guard] Lint: Pre-merge checks: clang-tidy: warning: header guard does not follow preferred style [llvm-header-guard]
[[https… | |||||
#define __OFFLOAD_MACROS_H__ | |||||
#undef _DEVICE_GPU | |||||
#undef _DEVICE_ARCH | |||||
Not Done ReplyInline ActionsThe naming seems to conflict with the current notation that GPU arch is the specific GPU variant. E.g. gfx900 or sm_60. tra: The naming seems to conflict with the current notation that GPU `arch` is the specific GPU… | |||||
#if defined(_OPENMP) | |||||
// OpenMP does not set architecture macros on host pass. | |||||
// So if either set, this is an OpenMP device pass. | |||||
#if defined(__AMDGCN__) || defined(__NVPTX__) | |||||
#if defined(__AMDGCN__) | |||||
Not Done ReplyInline ActionsI'd just split it into separate if sections for AMDGCN and NVPTX. One less nesting level for preprocessor conditionals would be easier to follow. tra: I'd just split it into separate `if` sections for AMDGCN and NVPTX. One less nesting level for… | |||||
#define _DEVICE_ARCH amdgcn | |||||
// _DEVICE_GPU set below | |||||
Not Done ReplyInline ActionsWhat exactly is amdgcn and how can it be used in practice? I.e. I can't use it in preprocessor conditionals. I think you need to have numberic constants defined for the different ARCH variants. tra: What exactly is `amdgcn` and how can it be used in practice? I.e. I can't use it in… | |||||
#else | |||||
#define _DEVICE_ARCH nvptx64 | |||||
Not Done ReplyInline ActionsPlease add a comment tracking which conditional this else is for. E.g. // __AMDGCN__ tra: Please add a comment tracking which conditional this `else` is for. E.g. `// __AMDGCN__` | |||||
#define _DEVICE_GPU __CUDA_ARCH__ | |||||
Not Done ReplyInline ActionsNit -- there's techically 32-bit nvptx, even though it's getting obsolete. tra: Nit -- there's techically 32-bit nvptx, even though it's getting obsolete. | |||||
#endif | |||||
#endif | |||||
#elif defined(__CUDA_ARCH__) | |||||
// CUDA sets macros __NVPTX__ on host pass. So use __CUDA_ARCH__ | |||||
// to determine if this is device pass. | |||||
#define _DEVICE_ARCH nvptx64 | |||||
#define _DEVICE_GPU __CUDA_ARCH__ | |||||
#elif defined(__HIP_DEVICE_COMPILE__) | |||||
// HIP sets macros __AMDGCN__ on host pass. So use __HIP_DEVICE_COMPILE__ | |||||
// to determine if this is device pass. | |||||
#define _DEVICE_ARCH amdgcn | |||||
// _DEVICE_GPU set below | |||||
#elif defined(__SYCL_DEVICE_ONLY__) | |||||
#if defined(__AMDGCN__) | |||||
#define _DEVICE_ARCH amdgcn | |||||
// _DEVICE_GPU set below | |||||
#else | |||||
#define _DEVICE_ARCH nvptx64 | |||||
#define _DEVICE_GPU __CUDA_ARCH__ | |||||
#endif | |||||
#elif defined(__OPENCL_C_VERSION__) || defined(__OPENCL_CPP_VERSION__) | |||||
#if defined(__AMDGCN__) | |||||
#define _DEVICE_ARCH amdgcn | |||||
// _DEVICE_GPU set below | |||||
#endif | |||||
#if defined(__NVPTX__) | |||||
#define _DEVICE_ARCH nvptx64 | |||||
#define _DEVICE_GPU __CUDA_ARCH__ | |||||
#endif | |||||
#endif | |||||
Not Done ReplyInline ActionsI guess the pattern #if defined(__AMDGCN__) #define _DEVICE_ARCH amdgcn // _DEVICE_GPU set below #endif #if defined(__NVPTX__) #define _DEVICE_ARCH nvptx64 #define _DEVICE_GPU __CUDA_ARCH__ #endif is repeating here but it might make sense to lists all the cases one by one instead of a single conditional, e.g., ifdef OPENMP || SYCL || OPENCL || ... jdoerfert: I guess the pattern
```
#if defined(__AMDGCN__)
#define _DEVICE_ARCH amdgcn
// _DEVICE_GPU set… | |||||
#if defined(_DEVICE_ARCH) && (_DEVICE_ARCH == amdgcn) | |||||
// AMD uses binary macros only, so create a value for _DEVICE_GPU | |||||
Not Done ReplyInline ActionsThis does not work, does it? https://godbolt.org/z/Kn3r4x tra: This does not work, does it? https://godbolt.org/z/Kn3r4x | |||||
#if defined(__gfx906__) | |||||
#define _DEVICE_GPU 9060 | |||||
#elif defined(__gfx900__) | |||||
#define _DEVICE_GPU 9000 | |||||
#elif defined(__gfx601__) | |||||
#define _DEVICE_GPU 6010 | |||||
#elif defined(__gfx700__) | |||||
#define _DEVICE_GPU 7000 | |||||
#elif defined(__gfx701__) | |||||
#define _DEVICE_GPU 7010 | |||||
#elif defined(__gfx702__) | |||||
#define _DEVICE_GPU 7020 | |||||
#elif defined(__gfx703__) | |||||
#define _DEVICE_GPU 7030 | |||||
#elif defined(__gfx801__) | |||||
#define _DEVICE_GPU 8010 | |||||
#elif defined(__gfx802__) | |||||
#define _DEVICE_GPU 8020 | |||||
#elif defined(__gfx803__) | |||||
#define _DEVICE_GPU 8030 | |||||
#elif defined(__gfx810__) | |||||
#define _DEVICE_GPU 8100 | |||||
#elif defined(__gfx900__) | |||||
#define _DEVICE_GPU 9000 | |||||
#elif defined(__gfx902__) | |||||
#define _DEVICE_GPU 9020 | |||||
#elif defined(__gfx904__) | |||||
#define _DEVICE_GPU 9040 | |||||
#elif defined(__gfx906__) | |||||
#define _DEVICE_GPU 9060 | |||||
#elif defined(__gfx909__) | |||||
#define _DEVICE_GPU 9090 | |||||
#elif defined(__gfx1010__) | |||||
#define _DEVICE_GPU 10100 | |||||
#elif defined(__gfx1011__) | |||||
#define _DEVICE_GPU 10110 | |||||
#elif defined(__gfx1012__) | |||||
#define _DEVICE_GPU 10120 | |||||
#elif defined(__gfx1030__) | |||||
#define _DEVICE_GPU 10300 | |||||
#else | |||||
#define _DEVICE_GPU UNKNOWN | |||||
#endif | |||||
#endif | |||||
#endif // __OFFLOAD_MACROS_H__ | |||||
No newline at end of file |
After @MaskRay noticed this, I think this should be __offload_macros.h to make it clear this is an internal header.