diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -87,6 +87,7 @@ __OMP_STRUCT_TYPE(Ident, ident_t, Int32, Int32, Int32, Int32, Int8Ptr) __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, Int8Ptr) +__OMP_STRUCT_TYPE(KernelEnvironmentTy, KernelEnvironmentTy, Ident) #undef __OMP_STRUCT_TYPE #undef OMP_STRUCT_TYPE @@ -423,8 +424,8 @@ /* Int */ Int32, /* kmp_task_t */ VoidPtr) /// OpenMP Device runtime functions -__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1, Int1) -__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1) +__OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentTyPtr, Int8, Int1, Int1) +__OMP_RTL(__kmpc_target_deinit, false, Void, Int8, Int1) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2811,8 +2811,25 @@ Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_init); - CallInst *ThreadKind = Builder.CreateCall( - Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal}); + Function *Kernel = Builder.GetInsertBlock()->getParent(); + const DataLayout &DL = Fn->getParent()->getDataLayout(); + Constant *KernelEnvironmentInitializer = ConstantStruct::get( + KernelEnvironmentTy, {cast(Ident)->getInitializer()}); + std::string KernelEnvironmentName = + (Kernel->getName() + "_kernel_info").str(); + GlobalVariable *KernelEnvironment = new GlobalVariable( + M, KernelEnvironmentTy, /*IsConstant*/ false, + llvm::GlobalValue::ExternalLinkage, KernelEnvironmentInitializer, + KernelEnvironmentName, + /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, + DL.getDefaultGlobalsAddressSpace()); + auto *KernelEnvironmentCasted = + ConstantExpr::getPointerBitCastOrAddrSpaceCast(KernelEnvironment, + KernelEnvironmentTyPtr); + + CallInst *ThreadKind = + Builder.CreateCall(Fn, {KernelEnvironmentCasted, IsSPMDVal, + UseGenericStateMachine, RequiresFullRuntimeVal}); Value *ExecUserCode = Builder.CreateICmpEQ( ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), @@ -2851,9 +2868,6 @@ if (!updateToLocation(Loc)) return; - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); @@ -2863,7 +2877,7 @@ Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_deinit); - Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal}); + Builder.CreateCall(Fn, {IsSPMDVal, RequiresFullRuntimeVal}); } std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef Parts, diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -117,7 +117,7 @@ ${source_directory}/Workshare.cpp ) -set(clang_opt_flags -O1 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=2048) +set(clang_opt_flags -O1 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=2048 -DOMPTARGET_DEVICE_RUNTIME) set(link_opt_flags -O1 -openmp-opt-disable) # Prepend -I to each list element diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -12,6 +12,7 @@ #ifndef OMPTARGET_DEVICERTL_INTERFACE_H #define OMPTARGET_DEVICERTL_INTERFACE_H +#include "KernelEnvironment.h" #include "Types.h" /// External API @@ -211,10 +212,10 @@ ///{ int8_t __kmpc_is_spmd_exec_mode(); -int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, +int32_t __kmpc_target_init(_OMP::KernelEnvironmentTy KernelEnv, int8_t Mode, bool UseGenericStateMachine, bool); -void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool); +void __kmpc_target_deinit(int8_t Mode, bool); ///} diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h --- a/openmp/libomptarget/DeviceRTL/include/State.h +++ b/openmp/libomptarget/DeviceRTL/include/State.h @@ -13,6 +13,7 @@ #define OMPTARGET_STATE_H #include "Debug.h" +#include "KernelEnvironment.h" #include "Types.h" #pragma omp declare target @@ -24,7 +25,10 @@ inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE; /// Initialize the state machinery. Must be called by all threads. -void init(bool IsSPMD); +void init(bool IsSPMD, KernelEnvironmentTy &KernelEnv); + +/// Return the kernel environment associated with the current kernel. +KernelEnvironmentTy &getKernelEnvironment(); /// TODO enum ValueKind { diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp --- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -12,6 +12,7 @@ #include "Debug.h" #include "Interface.h" +#include "KernelEnvironment.h" #include "Mapping.h" #include "State.h" #include "Synchronization.h" @@ -21,15 +22,15 @@ #pragma omp declare target -static void inititializeRuntime(bool IsSPMD) { +static void inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnv) { // Order is important here. synchronize::init(IsSPMD); mapping::init(IsSPMD); - state::init(IsSPMD); + state::init(IsSPMD, KernelEnv); } /// Simple generic state machine for worker threads. -static void genericStateMachine(IdentTy *Ident) { +static void genericStateMachine(IdentTy &Ident) { FunctionTracingRAII(); uint32_t TId = mapping::getThreadIdInBlock(); @@ -61,19 +62,15 @@ extern "C" { -/// Initialization -/// -/// \param Ident Source location identification, can be NULL. -/// -int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, +int32_t __kmpc_target_init(KernelEnvironmentTy KernelEnv, int8_t Mode, bool UseGenericStateMachine, bool) { FunctionTracingRAII(); const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; if (IsSPMD) { - inititializeRuntime(/* IsSPMD */ true); + inititializeRuntime(/* IsSPMD */ true, KernelEnv); synchronize::threadsAligned(); } else { - inititializeRuntime(/* IsSPMD */ false); + inititializeRuntime(/* IsSPMD */ false, KernelEnv); // No need to wait since only the main threads will execute user // code and workers will run into a barrier right away. } @@ -87,7 +84,7 @@ return -1; if (UseGenericStateMachine) - genericStateMachine(Ident); + genericStateMachine(KernelEnv.Ident); return mapping::getThreadIdInBlock(); } @@ -96,10 +93,7 @@ /// /// In non-SPMD, this function releases the workers trapped in a state machine /// and also any memory dynamically allocated by the runtime. -/// -/// \param Ident Source location identification, can be NULL. -/// -void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool) { +void __kmpc_target_deinit(int8_t Mode, bool) { FunctionTracingRAII(); const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; state::assumeInitialState(IsSPMD); diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -32,6 +32,9 @@ extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment))); #pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc) +/// The kernel environment passed to the init method by the compiler. +static KernelEnvironmentTy *SHARED(KernelEnvironmentPtr); + namespace { /// Fallback implementations are missing to trigger a link time error. @@ -364,14 +367,20 @@ __builtin_unreachable(); } -void state::init(bool IsSPMD) { +void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnv) { SharedMemorySmartStack.init(IsSPMD); - if (mapping::isInitialThreadInLevel0(IsSPMD)) + if (mapping::isInitialThreadInLevel0(IsSPMD)) { TeamState.init(IsSPMD); + KernelEnvironmentPtr = &KernelEnv; + } ThreadStates[mapping::getThreadIdInBlock()] = nullptr; } +KernelEnvironmentTy &state::getKernelEnvironment() { + return *KernelEnvironmentPtr; +} + void state::enterDataEnvironment() { unsigned TId = mapping::getThreadIdInBlock(); ThreadStateTy *NewThreadState = diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt @@ -131,6 +131,7 @@ -fvisibility=default -Wno-unused-value -nogpulib + -DOMPTARGET_OLD_DEVICE_RUNTIME -O${optimization_level} ${CUDA_DEBUG} -I${CMAKE_CURRENT_SOURCE_DIR}/src diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu --- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu @@ -207,7 +207,7 @@ } EXTERN -int32_t __kmpc_target_init(ident_t *Ident, int8_t Mode, +int32_t __kmpc_target_init(KernelEnvironmentTy KernelEnv, int8_t Mode, bool UseGenericStateMachine, bool RequiresFullRuntime) { const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; @@ -218,7 +218,7 @@ __kmpc_generic_kernel_init(); if (IsSPMD) { - __kmpc_barrier_simple_spmd(Ident, TId); + __kmpc_barrier_simple_spmd(&KernelEnv.Ident, TId); return -1; } @@ -226,13 +226,13 @@ return -1; if (UseGenericStateMachine) - __kmpc_target_region_state_machine(Ident); + __kmpc_target_region_state_machine(&KernelEnv.Ident); return TId; } EXTERN -void __kmpc_target_deinit(ident_t *Ident, int8_t Mode, +void __kmpc_target_deinit(int8_t Mode, bool RequiresFullRuntime) { const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; if (IsSPMD) diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h --- a/openmp/libomptarget/deviceRTLs/interface.h +++ b/openmp/libomptarget/deviceRTLs/interface.h @@ -439,11 +439,12 @@ EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid, int32_t cancelVal); +#include "KernelEnvironment.h" // non standard -EXTERN int32_t __kmpc_target_init(ident_t *Ident, int8_t Mode, +EXTERN int32_t __kmpc_target_init(KernelEnvironmentTy, int8_t Mode, bool UseGenericStateMachine, bool RequiresFullRuntime); -EXTERN void __kmpc_target_deinit(ident_t *Ident, int8_t Mode, +EXTERN void __kmpc_target_deinit(int8_t Mode, bool RequiresFullRuntime); EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn); EXTERN bool __kmpc_kernel_parallel(void **WorkFn); diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -187,7 +187,7 @@ # Generate a Bitcode library for all the compute capabilities the user requested foreach(sm ${nvptx_sm_list}) - set(cuda_flags -Xclang -target-cpu -Xclang sm_${sm} "-D__CUDA_ARCH__=${sm}0") + set(cuda_flags -Xclang -target-cpu -Xclang sm_${sm} "-D__CUDA_ARCH__=${sm}0" -DOMPTARGET_OLD_DEVICE_RUNTIME) set(bc_files "") foreach(src ${cuda_src_files}) get_filename_component(infile ${src} ABSOLUTE) diff --git a/openmp/libomptarget/include/KernelEnvironment.h b/openmp/libomptarget/include/KernelEnvironment.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/include/KernelEnvironment.h @@ -0,0 +1,40 @@ +//===---- KernelEnvironment.h - OpenMP GPU kernel environment ----- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#ifndef OMPTARGET_KERNEL_ENVIRONMENT_H +#define OMPTARGET_KERNEL_ENVIRONMENT_H + +// deviceRTL uses and DeviceRTL uses explicit definitions + +#ifdef OMPTARGET_DEVICE_RUNTIME +#include "Types.h" +#else +#ifdef OMPTARGET_OLD_DEVICE_RUNTIME +#include "interface.h" +using IdentTy = ident_t; +#else +#include +using IdentTy = ident_t; +#endif +#endif + +#ifdef OMPTARGET_DEVICE_RUNTIME +namespace _OMP { +#endif + +struct KernelEnvironmentTy { + IdentTy Ident; +}; + +#ifdef OMPTARGET_DEVICE_RUNTIME +} // namespace _OMP +#endif + +#endif