diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h deleted file mode 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h +++ /dev/null @@ -1,40 +0,0 @@ -//===--- CGOpenMPRuntimeAMDGCN.h - Interface to OpenMP AMDGCN Runtimes ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This provides a class for OpenMP runtime code generation specialized to -// AMDGCN targets from generalized CGOpenMPRuntimeGPU class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H -#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H - -#include "CGOpenMPRuntime.h" -#include "CGOpenMPRuntimeGPU.h" -#include "CodeGenFunction.h" -#include "clang/AST/StmtOpenMP.h" - -namespace clang { -namespace CodeGen { - -class CGOpenMPRuntimeAMDGCN final : public CGOpenMPRuntimeGPU { - -public: - explicit CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM); - - /// Get the GPU warp size. - llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override; - - /// Get the id of the current thread on the GPU. - llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override; -}; - -} // namespace CodeGen -} // namespace clang - -#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp deleted file mode 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp +++ /dev/null @@ -1,48 +0,0 @@ -//===-- CGOpenMPRuntimeAMDGCN.cpp - Interface to OpenMP AMDGCN Runtimes --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This provides a class for OpenMP runtime code generation specialized to -// AMDGCN targets from generalized CGOpenMPRuntimeGPU class. -// -//===----------------------------------------------------------------------===// - -#include "CGOpenMPRuntimeAMDGCN.h" -#include "CGOpenMPRuntimeGPU.h" -#include "CodeGenFunction.h" -#include "clang/AST/Attr.h" -#include "clang/AST/DeclOpenMP.h" -#include "clang/AST/StmtOpenMP.h" -#include "clang/AST/StmtVisitor.h" -#include "clang/Basic/Cuda.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Frontend/OpenMP/OMPGridValues.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" - -using namespace clang; -using namespace CodeGen; -using namespace llvm::omp; - -CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM) - : CGOpenMPRuntimeGPU(CGM) { - if (!CGM.getLangOpts().OpenMPIsDevice) - llvm_unreachable("OpenMP AMDGCN can only handle device code."); -} - -llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - // return constant compile-time target-specific warp size - unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size; - return Bld.getInt32(WarpSize); -} - -llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - llvm::Function *F = - CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x); - return Bld.CreateCall(F, llvm::None, "nvptx_tid"); -} diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -176,10 +176,10 @@ /// and NVPTX. /// Get the GPU warp size. - virtual llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) = 0; + llvm::Value *getGPUWarpSize(CodeGenFunction &CGF); /// Get the id of the current thread on the GPU. - virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0; + llvm::Value *getGPUThreadID(CodeGenFunction &CGF); /// Get the maximum number of threads in a block of the GPU. llvm::Value *getGPUNumThreads(CodeGenFunction &CGF); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "CGOpenMPRuntimeGPU.h" -#include "CGOpenMPRuntimeNVPTX.h" #include "CodeGenFunction.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclOpenMP.h" @@ -21,6 +20,7 @@ #include "clang/Basic/Cuda.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/Support/MathExtras.h" @@ -1197,7 +1197,7 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) - llvm_unreachable("OpenMP NVPTX can only handle device code."); + llvm_unreachable("OpenMP can only handle device code."); llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder(); if (CGM.getLangOpts().OpenMPTargetNewRuntime) { @@ -3960,3 +3960,17 @@ } return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); } + +llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) { + ArrayRef Args{}; + return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block), + Args); +} + +llvm::Value *CGOpenMPRuntimeGPU::getGPUWarpSize(CodeGenFunction &CGF) { + ArrayRef Args{}; + return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_warp_size), + Args); +} diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h deleted file mode 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ /dev/null @@ -1,40 +0,0 @@ -//===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This provides a class for OpenMP runtime code generation specialized to NVPTX -// targets from generalized CGOpenMPRuntimeGPU class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H -#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H - -#include "CGOpenMPRuntime.h" -#include "CGOpenMPRuntimeGPU.h" -#include "CodeGenFunction.h" -#include "clang/AST/StmtOpenMP.h" - -namespace clang { -namespace CodeGen { - -class CGOpenMPRuntimeNVPTX final : public CGOpenMPRuntimeGPU { - -public: - explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); - - /// Get the GPU warp size. - llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override; - - /// Get the id of the current thread on the GPU. - llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override; -}; - -} // CodeGen namespace. -} // clang namespace. - -#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp deleted file mode 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ /dev/null @@ -1,48 +0,0 @@ -//===---- CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This provides a class for OpenMP runtime code generation specialized to NVPTX -// targets from generalized CGOpenMPRuntimeGPU class. -// -//===----------------------------------------------------------------------===// - -#include "CGOpenMPRuntimeNVPTX.h" -#include "CGOpenMPRuntimeGPU.h" -#include "CodeGenFunction.h" -#include "clang/AST/Attr.h" -#include "clang/AST/DeclOpenMP.h" -#include "clang/AST/StmtOpenMP.h" -#include "clang/AST/StmtVisitor.h" -#include "clang/Basic/Cuda.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/IR/IntrinsicsNVPTX.h" - -using namespace clang; -using namespace CodeGen; -using namespace llvm::omp; - -CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) - : CGOpenMPRuntimeGPU(CGM) { - if (!CGM.getLangOpts().OpenMPIsDevice) - llvm_unreachable("OpenMP NVPTX can only handle device code."); -} - -llvm::Value *CGOpenMPRuntimeNVPTX::getGPUWarpSize(CodeGenFunction &CGF) { - return CGF.EmitRuntimeCall( - llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), - "nvptx_warp_size"); -} - -llvm::Value *CGOpenMPRuntimeNVPTX::getGPUThreadID(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - llvm::Function *F; - F = llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x); - return Bld.CreateCall(F, llvm::None, "nvptx_tid"); -} diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -59,9 +59,7 @@ CGObjCRuntime.cpp CGOpenCLRuntime.cpp CGOpenMPRuntime.cpp - CGOpenMPRuntimeAMDGCN.cpp CGOpenMPRuntimeGPU.cpp - CGOpenMPRuntimeNVPTX.cpp CGRecordLayoutBuilder.cpp CGStmt.cpp CGStmtOpenMP.cpp diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -19,8 +19,7 @@ #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" -#include "CGOpenMPRuntimeAMDGCN.h" -#include "CGOpenMPRuntimeNVPTX.h" +#include "CGOpenMPRuntimeGPU.h" #include "CodeGenFunction.h" #include "CodeGenPGO.h" #include "ConstantEmitter.h" @@ -244,14 +243,10 @@ switch (getTriple().getArch()) { case llvm::Triple::nvptx: case llvm::Triple::nvptx64: - assert(getLangOpts().OpenMPIsDevice && - "OpenMP NVPTX is only prepared to deal with device code."); - OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this)); - break; case llvm::Triple::amdgcn: assert(getLangOpts().OpenMPIsDevice && - "OpenMP AMDGCN is only prepared to deal with device code."); - OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this)); + "OpenMP AMDGPU/NVPTX is only prepared to deal with device code."); + OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this)); break; default: if (LangOpts.OpenMPSimd) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -455,6 +455,8 @@ __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) +__OMP_RTL(__kmpc_get_warp_size, false, Int32, ) + __OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32) __OMP_RTL(__last, false, Void, ) diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -277,5 +277,10 @@ FunctionTracingRAII(); return impl::getNumHardwareThreadsInBlock(); } + +__attribute__((noinline)) uint32_t __kmpc_get_warp_size() { + FunctionTracingRAII(); + return impl::getWarpSize(); +} } #pragma omp end declare target