diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -210,9 +210,6 @@ unsigned char RegParmMax, SSERegParmMax; TargetCXXABI TheCXXABI; const LangASMap *AddrSpaceMap; - const llvm::omp::GV *GridValues = - nullptr; // target-specific GPU grid values that must be - // consistent between host RTL (plugin), device RTL, and clang. mutable StringRef PlatformName; mutable VersionTuple PlatformMinVersion; @@ -1410,10 +1407,10 @@ return LangAS::Default; } - /// Return a target-specific GPU grid values - const llvm::omp::GV &getGridValue() const { - assert(GridValues != nullptr && "GridValues not initialized"); - return *GridValues; + // access target-specific GPU grid values that must be consistent between + // host RTL (plugin), deviceRTL and clang. + virtual const llvm::omp::GV &getGridValue() const { + llvm_unreachable("getGridValue not implemented on this target"); } /// Retrieve the name of the platform as it is used in the diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -370,6 +370,10 @@ return getLangASFromTargetAS(Constant); } + const llvm::omp::GV &getGridValue() const override { + return llvm::omp::AMDGPUGridValues; + } + /// \returns Target specific vtbl ptr address space. unsigned getVtblPtrAddressSpace() const override { return static_cast(Constant); diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -17,7 +17,6 @@ #include "clang/Basic/MacroBuilder.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Frontend/OpenMP/OMPGridValues.h" using namespace clang; using namespace clang::targets; @@ -335,7 +334,6 @@ llvm::AMDGPU::getArchAttrR600(GPUKind)) { resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN : DataLayoutStringR600); - GridValues = &llvm::omp::AMDGPUGridValues; setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || !isAMDGCN(Triple)); diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -147,6 +147,10 @@ Opts["cl_khr_local_int32_extended_atomics"] = true; } + const llvm::omp::GV &getGridValue() const override { + return llvm::omp::NVPTXGridValues; + } + /// \returns If a target requires an address within a target specific address /// space \p AddressSpace to be converted in order to be used, then return the /// corresponding target specific DWARF address space. diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -16,7 +16,6 @@ #include "clang/Basic/MacroBuilder.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Frontend/OpenMP/OMPGridValues.h" using namespace clang; using namespace clang::targets; @@ -65,7 +64,6 @@ TLSSupported = false; VLASupported = false; AddrSpaceMap = &NVPTXAddrSpaceMap; - GridValues = &llvm::omp::NVPTXGridValues; UseAddrSpaceMapMangling = true; // Define available target features diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" #include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/Support/MathExtras.h" using namespace clang; using namespace CodeGen; @@ -106,8 +107,7 @@ /// is the same for all known NVPTX architectures. enum MachineConfiguration : unsigned { /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target - /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2, - /// and GV_Warp_Size_Log2_Mask. + /// specific Grid Values like GV_Warp_Size, GV_Slot_Size /// Global memory alignment for performance. GlobalMemoryAlignment = 128, @@ -535,7 +535,8 @@ /// on the NVPTX device, to generate more efficient code. static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; - unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2; + unsigned LaneIDBits = + llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size); auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id"); } @@ -545,8 +546,9 @@ /// on the NVPTX device, to generate more efficient code. static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; - unsigned LaneIDMask = - CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask; + unsigned LaneIDBits = + llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size); + unsigned LaneIDMask = ~0 >> (32u - LaneIDBits); auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask), "nvptx_lane_id"); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h @@ -62,19 +62,13 @@ const unsigned GV_Slot_Size; /// The default value of maximum number of threads in a worker warp. const unsigned GV_Warp_Size; - /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size - /// for NVPTX. - const unsigned GV_Warp_Size_32; - /// The number of bits required to represent the max number of threads in warp - const unsigned GV_Warp_Size_Log2; - /// GV_Warp_Size * GV_Slot_Size, - const unsigned GV_Warp_Slot_Size; + + constexpr unsigned warpSlotSize() const { + return GV_Warp_Size * GV_Slot_Size; + } + /// the maximum number of teams. const unsigned GV_Max_Teams; - /// Global Memory Alignment - const unsigned GV_Mem_Align; - /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) - const unsigned GV_Warp_Size_Log2_Mask; // An alternative to the heavy data sharing infrastructure that uses global // memory is one that uses device __shared__ memory. The amount of such space // (in bytes) reserved by the OpenMP runtime is noted here. @@ -83,47 +77,32 @@ const unsigned GV_Max_WG_Size; // The default maximum team size for a working group const unsigned GV_Default_WG_Size; - // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN. - const unsigned GV_Max_Warp_Number; - /// The slot size that should be reserved for a working warp. - /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) - const unsigned GV_Warp_Size_Log2_MaskL; + + constexpr unsigned maxWarpNumber() const { + return GV_Max_WG_Size / GV_Warp_Size; + } }; /// For AMDGPU GPUs static constexpr GV AMDGPUGridValues = { - 448, // GV_Threads - 256, // GV_Slot_Size - 64, // GV_Warp_Size - 32, // GV_Warp_Size_32 - 6, // GV_Warp_Size_Log2 - 64 * 256, // GV_Warp_Slot_Size - 128, // GV_Max_Teams - 256, // GV_Mem_Align - 63, // GV_Warp_Size_Log2_Mask - 896, // GV_SimpleBufferSize - 1024, // GV_Max_WG_Size, - 256, // GV_Defaut_WG_Size - 1024 / 64, // GV_Max_WG_Size / GV_WarpSize - 63 // GV_Warp_Size_Log2_MaskL + 448, // GV_Threads + 256, // GV_Slot_Size + 64, // GV_Warp_Size + 128, // GV_Max_Teams + 896, // GV_SimpleBufferSize + 1024, // GV_Max_WG_Size, + 256, // GV_Default_WG_Size }; /// For Nvidia GPUs static constexpr GV NVPTXGridValues = { - 992, // GV_Threads - 256, // GV_Slot_Size - 32, // GV_Warp_Size - 32, // GV_Warp_Size_32 - 5, // GV_Warp_Size_Log2 - 32 * 256, // GV_Warp_Slot_Size - 1024, // GV_Max_Teams - 256, // GV_Mem_Align - (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask - 896, // GV_SimpleBufferSize - 1024, // GV_Max_WG_Size - 128, // GV_Defaut_WG_Size - 1024 / 32, // GV_Max_WG_Size / GV_WarpSize - 31 // GV_Warp_Size_Log2_MaskL + 992, // GV_Threads + 256, // GV_Slot_Size + 32, // GV_Warp_Size + 1024, // GV_Max_Teams + 896, // GV_SimpleBufferSize + 1024, // GV_Max_WG_Size + 128, // GV_Default_WG_Size }; } // namespace omp