diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h deleted file mode 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h +++ /dev/null @@ -1,40 +0,0 @@ -//===--- CGOpenMPRuntimeAMDGCN.h - Interface to OpenMP AMDGCN Runtimes ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This provides a class for OpenMP runtime code generation specialized to -// AMDGCN targets from generalized CGOpenMPRuntimeGPU class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H -#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H - -#include "CGOpenMPRuntime.h" -#include "CGOpenMPRuntimeGPU.h" -#include "CodeGenFunction.h" -#include "clang/AST/StmtOpenMP.h" - -namespace clang { -namespace CodeGen { - -class CGOpenMPRuntimeAMDGCN final : public CGOpenMPRuntimeGPU { - -public: - explicit CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM); - - /// Get the GPU warp size. - llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override; - - /// Get the id of the current thread on the GPU. - llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override; -}; - -} // namespace CodeGen -} // namespace clang - -#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp deleted file mode 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp +++ /dev/null @@ -1,48 +0,0 @@ -//===-- CGOpenMPRuntimeAMDGCN.cpp - Interface to OpenMP AMDGCN Runtimes --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This provides a class for OpenMP runtime code generation specialized to -// AMDGCN targets from generalized CGOpenMPRuntimeGPU class. -// -//===----------------------------------------------------------------------===// - -#include "CGOpenMPRuntimeAMDGCN.h" -#include "CGOpenMPRuntimeGPU.h" -#include "CodeGenFunction.h" -#include "clang/AST/Attr.h" -#include "clang/AST/DeclOpenMP.h" -#include "clang/AST/StmtOpenMP.h" -#include "clang/AST/StmtVisitor.h" -#include "clang/Basic/Cuda.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Frontend/OpenMP/OMPGridValues.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" - -using namespace clang; -using namespace CodeGen; -using namespace llvm::omp; - -CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM) - : CGOpenMPRuntimeGPU(CGM) { - if (!CGM.getLangOpts().OpenMPIsDevice) - llvm_unreachable("OpenMP AMDGCN can only handle device code."); -} - -llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - // return constant compile-time target-specific warp size - unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size; - return Bld.getInt32(WarpSize); -} - -llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - llvm::Function *F = - CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x); - return Bld.CreateCall(F, llvm::None, "nvptx_tid"); -} diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -176,10 +176,10 @@ /// and NVPTX. /// Get the GPU warp size. - virtual llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) = 0; + llvm::Value *getGPUWarpSize(CodeGenFunction &CGF); /// Get the id of the current thread on the GPU. - virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0; + llvm::Value *getGPUThreadID(CodeGenFunction &CGF); /// Get the maximum number of threads in a block of the GPU. llvm::Value *getGPUNumThreads(CodeGenFunction &CGF); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "CGOpenMPRuntimeGPU.h" -#include "CGOpenMPRuntimeNVPTX.h" #include "CodeGenFunction.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclOpenMP.h" @@ -21,7 +20,6 @@ #include "clang/Basic/Cuda.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" -#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/Support/MathExtras.h" using namespace clang; @@ -1197,7 +1195,7 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) - llvm_unreachable("OpenMP NVPTX can only handle device code."); + llvm_unreachable("OpenMP can only handle device code."); llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder(); if (CGM.getLangOpts().OpenMPTargetNewRuntime) { @@ -3960,3 +3958,17 @@ } return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); } + +llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) { + ArrayRef Args{}; + return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block), + Args); +} + +llvm::Value *CGOpenMPRuntimeGPU::getGPUWarpSize(CodeGenFunction &CGF) { + ArrayRef Args{}; + return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_warp_size), + Args); +} diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h deleted file mode 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ /dev/null @@ -1,40 +0,0 @@ -//===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This provides a class for OpenMP runtime code generation specialized to NVPTX -// targets from generalized CGOpenMPRuntimeGPU class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H -#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H - -#include "CGOpenMPRuntime.h" -#include "CGOpenMPRuntimeGPU.h" -#include "CodeGenFunction.h" -#include "clang/AST/StmtOpenMP.h" - -namespace clang { -namespace CodeGen { - -class CGOpenMPRuntimeNVPTX final : public CGOpenMPRuntimeGPU { - -public: - explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); - - /// Get the GPU warp size. - llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override; - - /// Get the id of the current thread on the GPU. - llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override; -}; - -} // CodeGen namespace. -} // clang namespace. - -#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp deleted file mode 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ /dev/null @@ -1,48 +0,0 @@ -//===---- CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This provides a class for OpenMP runtime code generation specialized to NVPTX -// targets from generalized CGOpenMPRuntimeGPU class. -// -//===----------------------------------------------------------------------===// - -#include "CGOpenMPRuntimeNVPTX.h" -#include "CGOpenMPRuntimeGPU.h" -#include "CodeGenFunction.h" -#include "clang/AST/Attr.h" -#include "clang/AST/DeclOpenMP.h" -#include "clang/AST/StmtOpenMP.h" -#include "clang/AST/StmtVisitor.h" -#include "clang/Basic/Cuda.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/IR/IntrinsicsNVPTX.h" - -using namespace clang; -using namespace CodeGen; -using namespace llvm::omp; - -CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) - : CGOpenMPRuntimeGPU(CGM) { - if (!CGM.getLangOpts().OpenMPIsDevice) - llvm_unreachable("OpenMP NVPTX can only handle device code."); -} - -llvm::Value *CGOpenMPRuntimeNVPTX::getGPUWarpSize(CodeGenFunction &CGF) { - return CGF.EmitRuntimeCall( - llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), - "nvptx_warp_size"); -} - -llvm::Value *CGOpenMPRuntimeNVPTX::getGPUThreadID(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - llvm::Function *F; - F = llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x); - return Bld.CreateCall(F, llvm::None, "nvptx_tid"); -} diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -59,9 +59,7 @@ CGObjCRuntime.cpp CGOpenCLRuntime.cpp CGOpenMPRuntime.cpp - CGOpenMPRuntimeAMDGCN.cpp CGOpenMPRuntimeGPU.cpp - CGOpenMPRuntimeNVPTX.cpp CGRecordLayoutBuilder.cpp CGStmt.cpp CGStmtOpenMP.cpp diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -19,8 +19,7 @@ #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" -#include "CGOpenMPRuntimeAMDGCN.h" -#include "CGOpenMPRuntimeNVPTX.h" +#include "CGOpenMPRuntimeGPU.h" #include "CodeGenFunction.h" #include "CodeGenPGO.h" #include "ConstantEmitter.h" @@ -244,14 +243,10 @@ switch (getTriple().getArch()) { case llvm::Triple::nvptx: case llvm::Triple::nvptx64: - assert(getLangOpts().OpenMPIsDevice && - "OpenMP NVPTX is only prepared to deal with device code."); - OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this)); - break; case llvm::Triple::amdgcn: assert(getLangOpts().OpenMPIsDevice && - "OpenMP AMDGCN is only prepared to deal with device code."); - OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this)); + "OpenMP AMDGPU/NVPTX is only prepared to deal with device code."); + OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this)); break; default: if (LangOpts.OpenMPSimd) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -455,6 +455,8 @@ __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__kmpc_get_warp_size, false, Int32, ) + __OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32) __OMP_RTL(__last, false, Void, ) diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -277,5 +277,10 @@ FunctionTracingRAII(); return impl::getNumHardwareThreadsInBlock(); } + +__attribute__((noinline)) uint32_t __kmpc_get_warp_size() { + FunctionTracingRAII(); + return impl::getWarpSize(); +} } #pragma omp end declare target diff --git a/openmp/libomptarget/DeviceRTL/src/Utils.cpp b/openmp/libomptarget/DeviceRTL/src/Utils.cpp --- a/openmp/libomptarget/DeviceRTL/src/Utils.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Utils.cpp @@ -24,6 +24,7 @@ __attribute__((used, weak, optnone)) void keepAlive() { __kmpc_get_hardware_thread_id_in_block(); __kmpc_get_hardware_num_threads_in_block(); + __kmpc_get_warp_size(); __kmpc_barrier_simple_spmd(nullptr, 0); __kmpc_barrier_simple_generic(nullptr, 0); } diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip @@ -133,8 +133,11 @@ __builtin_amdgcn_workgroup_size_x()); } +uint32_t __kmpc_get_warp_size() { + return WARPSIZE; +} + EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; } -EXTERN unsigned GetWarpSize() { return WARPSIZE; } EXTERN unsigned GetLaneId() { return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); } diff --git a/openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h b/openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h --- a/openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h +++ b/openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h @@ -35,7 +35,7 @@ ///{ extern "C" { unsigned GetLaneId(); -unsigned GetWarpSize(); +unsigned __kmpc_get_warp_size(); void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi); uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi); } @@ -60,7 +60,7 @@ inline int32_t __kmpc_impl_shfl_sync(uint64_t Mask, int32_t Var, int32_t SrcLane) { - int Width = GetWarpSize(); + int Width = __kmpc_get_warp_size(); int Self = GetLaneId(); int Index = SrcLane + (Self & ~(Width - 1)); return __builtin_amdgcn_ds_bpermute(Index << 2, Var); @@ -90,7 +90,7 @@ inline int32_t __kmpc_impl_shfl_down_sync(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) { - int32_t T = ((GetWarpSize() - Width) << 8) | 0x1f; + int32_t T = ((__kmpc_get_warp_size() - Width) << 8) | 0x1f; return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T); } diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu @@ -105,11 +105,14 @@ EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; } -EXTERN unsigned GetWarpSize() { return WARPSIZE; } EXTERN unsigned GetLaneId() { return __kmpc_get_hardware_thread_id_in_block() & (WARPSIZE - 1); } +unsigned __kmpc_get_warp_size() { + return WARPSIZE; +} + // Atomics uint32_t __kmpc_atomic_add(uint32_t *Address, uint32_t Val) { return __atomic_fetch_add(Address, Val, __ATOMIC_SEQ_CST);