diff --git a/clang/include/clang/Basic/OpenMPGridValues.h b/clang/include/clang/Basic/OpenMPGridValues.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Basic/OpenMPGridValues.h @@ -0,0 +1,134 @@ +//===--- OpenMPGridValues.h - Language-specific address spaces --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Provides definitions for Target specific Grid Values +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_OPENMPGRIDVALUES_H +#define LLVM_CLANG_BASIC_OPENMPGRIDVALUES_H + +namespace clang { + +namespace GPU { + +/// \brief Defines various target-specific Gpu grid values that must be +/// consistent between host RTL (plugin), device RTL, and clang. +/// By adding this to TargetInfo in clang, we can change grid values +/// for a "fat" binary so that different passes get the correct values +/// when generating code for a multi-target binary. Both amdgcn +/// and nvptx values are stored in this file. In the future, should +/// there be differences between GPUs of the same architecture, +/// then simply make a different array and use the new array name. +/// +/// Example usage in clang: +/// const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size); +/// +/// Example usage in libomptarget/deviceRTLs: +/// #include "OpenMPGridValues.h" +/// #ifdef __AMDGPU__ +/// #define GRIDVAL AMDGPUGpuGridValues +/// #else +/// #define GRIDVAL NVPTXGpuGridValues +/// #endif +/// ... Then use this reference for GV_Warp_Size in the deviceRTL source. +/// GRIDVAL[GV_Warp_Size] +/// +/// Example usage in libomptarget hsa plugin: +/// #include "OpenMPGridValues.h" +/// #define GRIDVAL AMDGPUGpuGridValues +/// ... Then use this reference to access GV_Warp_Size in the hsa plugin. +/// GRIDVAL[GV_Warp_Size] +/// +/// Example usage in libomptarget cuda plugin: +/// #include "OpenMPGridValues.h" +/// #define GRIDVAL NVPTXGpuGridValues +/// ... Then use this reference to access GV_Warp_Size in the cuda plugin. +/// GRIDVAL[GV_Warp_Size] +/// +enum GVIDX { + /// The maximum number of workers in a kernel. + /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z + GV_Threads, + /// The size reserved for data in a shared memory slot. + GV_Slot_Size, + /// The maximum number of threads in a worker warp. + GV_Warp_Size, + /// The number of bits required to represent the max number of threads in warp + GV_Warp_Size_Log2, + /// GV_Warp_Size * GV_Slot_Size, + GV_Warp_Slot_Size, + /// the maximum number of teams. + GV_Max_Teams, + /// Global Memory Alignment + GV_Mem_Align, + /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) + GV_Warp_Size_Log2_Mask, + // An alternative to the heavy data sharing infrastructure that uses global + // memory is one that uses device __shared__ memory. The amount of such space + // (in bytes) reserved by the OpenMP runtime is noted here. + GV_SimpleBufferSize, + // The absolute maximum team size for a working group + GV_Max_WG_Size, + // The default maximum team size for a working group + GV_Default_WG_Size, + // This is GV_Max_WG_Size / GV_WarpSize. 32 for Nvidia and 16 for AMD. + GV_Max_Warp_Number +}; + +enum GVLIDX { + /// The slot size that should be reserved for a working warp. + /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) + GV_Warp_Size_Log2_MaskL +}; + +/// For AMDGPU GPUs +static constexpr int AMDGPUGpuGridValues[] = { + 448, // GV_Threads FIXME: How can we make this bigger? + 256, // GV_Slot_Size + 64, // GV_Warp_Size + 6, // GV_Warp_Size_Log2 + 64 * 256, // GV_Warp_Slot_Size + 128, // GV_Max_Teams + 256, // GV_Mem_Align + 63, // GV_Warp_Size_Log2_Mask + 896, // GV_SimpleBufferSize + 1024, // GV_Max_WG_Size, + 256, // GV_Defaut_WG_Size + 1024 / 64 // This is GV_Max_WG_Size / GV_WarpSize + +}; +static constexpr long long AMDGPUGpuLongGridValues[] = { + 63 // GV_Warp_Size_Log2_MaskL +}; + +/// For Nvidia GPUs +static constexpr int NVPTXGpuGridValues[] = { + 992, // GV_Threads + 256, // GV_Slot_Size + 32, // GV_Warp_Size + 5, // GV_Warp_Size_Log2 + 32 * 256, // GV_Warp_Slot_Size + 1024, // GV_Max_Teams + 256, // GV_Mem_Align + (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask + 896, // GV_SimpleBufferSize + 1024, // GV_Max_WG_Size + 128, // GV_Defaut_WG_Size + 1024 / 32 // GV_Max_WG_Size / GV_WarpSize +}; + +static constexpr long long NVPTXGpuLongGridValues[] = { + 31 // GV_Warp_Size_Log2_MaskL +}; + +} // namespace GPU +} // namespace clang +#endif diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -15,8 +15,10 @@ #define LLVM_CLANG_BASIC_TARGETINFO_H #include "clang/Basic/AddressSpaces.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangOptions.h" +#include "clang/Basic/OpenMPGridValues.h" #include "clang/Basic/Specifiers.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetOptions.h" @@ -196,6 +198,8 @@ unsigned char RegParmMax, SSERegParmMax; TargetCXXABI TheCXXABI; const LangASMap *AddrSpaceMap; + const int *GridValues; + const long long int *LongGridValues; mutable StringRef PlatformName; mutable VersionTuple PlatformMinVersion; @@ -1306,6 +1310,12 @@ return LangAS::Default; } + int getGridValue(GPU::GVIDX gv) const { return GridValues[gv]; } + + long long getLongGridValue(GPU::GVLIDX gv) const { + return LongGridValues[gv]; + } + /// Retrieve the name of the platform as it is used in the /// availability attribute. StringRef getPlatformName() const { return PlatformName; } diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -15,6 +15,7 @@ #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/MacroBuilder.h" +#include "clang/Basic/OpenMPGridValues.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/DataLayout.h" @@ -286,6 +287,8 @@ resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN : DataLayoutStringR600); assert(DataLayout->getAllocaAddrSpace() == Private); + GridValues = (const int *)&(GPU::AMDGPUGpuGridValues[0]); + LongGridValues = (const long long *)&(GPU::AMDGPUGpuLongGridValues[0]); setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || !isAMDGCN(Triple)); diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -14,6 +14,7 @@ #include "Targets.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/MacroBuilder.h" +#include "clang/Basic/OpenMPGridValues.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/ADT/StringSwitch.h" @@ -62,6 +63,8 @@ TLSSupported = false; VLASupported = false; AddrSpaceMap = &NVPTXAddrSpaceMap; + GridValues = (const int *)&(GPU::NVPTXGpuGridValues[0]); + LongGridValues = (const long long *)&(GPU::NVPTXGpuLongGridValues[0]); UseAddrSpaceMapMangling = true; // Define available target features