diff --git a/llvm/include/llvm/Frontend/OpenMP/ConfigurationEnvironment.h b/llvm/include/llvm/Frontend/OpenMP/ConfigurationEnvironment.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Frontend/OpenMP/ConfigurationEnvironment.h @@ -0,0 +1,33 @@ +//===- ConfigurationEnvironment.h - OpenMP GPU config environment - C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#ifndef OMPTARGET_CONFIGURATION_ENVIRONMENT_H +#define OMPTARGET_CONFIGURATION_ENVIRONMENT_H + +// deviceRTL uses and DeviceRTL uses explicit definitions + +#include "Environment.h" +#include "OMPConstants.h" + +#ifdef OMPTARGET_DEVICE_RUNTIME +namespace _OMP { +#endif + +struct ConfigurationEnvironmentTy { + uint8_t UseGenericStateMachine; + + llvm::omp::OMPTgtExecModeFlags ExecMode; +}; + +#ifdef OMPTARGET_DEVICE_RUNTIME +} // namespace _OMP +#endif + +#endif diff --git a/llvm/include/llvm/Frontend/OpenMP/Environment.h b/llvm/include/llvm/Frontend/OpenMP/Environment.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Frontend/OpenMP/Environment.h @@ -0,0 +1,28 @@ +//===- Environment.h - OpenMP GPU environment helper declarations - C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#ifndef OMPTARGET_ENVIRONMENT_H +#define OMPTARGET_ENVIRONMENT_H + +// deviceRTL uses and DeviceRTL uses explicit definitions + +#ifdef OMPTARGET_DEVICE_RUNTIME +#include "Types.h" +#else +#ifdef OMPTARGET_OLD_DEVICE_RUNTIME +#include "interface.h" +using IdentTy = ident_t; +#else +#include "SourceInfo.h" +using IdentTy = ident_t; +#endif +#endif + +#endif diff --git a/llvm/include/llvm/Frontend/OpenMP/KernelEnvironment.h b/llvm/include/llvm/Frontend/OpenMP/KernelEnvironment.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Frontend/OpenMP/KernelEnvironment.h @@ -0,0 +1,37 @@ +//===---- KernelEnvironment.h - OpenMP GPU kernel environment ----- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#ifndef OMPTARGET_KERNEL_ENVIRONMENT_H +#define OMPTARGET_KERNEL_ENVIRONMENT_H + +// deviceRTL uses and DeviceRTL uses explicit definitions + +#include "ConfigurationEnvironment.h" +#include "Environment.h" + +#ifdef OMPTARGET_DEVICE_RUNTIME +namespace _OMP { +#endif + +struct KernelEnvironmentTy { + IdentTy Ident; + + ConfigurationEnvironmentTy Configuration; + + /// Current indentation level for the function trace. Only accessed by thread + /// 0. + uint16_t DebugIndentionLevel; +}; + +#ifdef OMPTARGET_DEVICE_RUNTIME +} // namespace _OMP +#endif + +#endif diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -13,6 +13,7 @@ #define OMPTARGET_DEVICERTL_INTERFACE_H #include "Types.h" +#include "llvm/Frontend/OpenMP/KernelEnvironment.h" /// External API /// @@ -214,10 +215,9 @@ ///{ int8_t __kmpc_is_spmd_exec_mode(); -int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, - bool UseGenericStateMachine, bool); +int32_t __kmpc_target_init(_OMP::KernelEnvironmentTy &KernelEnv, bool); -void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool); +void __kmpc_target_deinit(bool); ///} diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h --- a/openmp/libomptarget/DeviceRTL/include/State.h +++ b/openmp/libomptarget/DeviceRTL/include/State.h @@ -14,6 +14,7 @@ #include "Debug.h" #include "Types.h" +#include "llvm/Frontend/OpenMP/KernelEnvironment.h" #pragma omp declare target @@ -24,7 +25,10 @@ inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE; /// Initialize the state machinery. Must be called by all threads. -void init(bool IsSPMD); +void init(bool IsSPMD, KernelEnvironmentTy &KernelEnv); + +/// Return the kernel environment associated with the current kernel. +KernelEnvironmentTy &getKernelEnvironment(); /// TODO enum ValueKind { diff --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp --- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp @@ -14,6 +14,7 @@ #include "Configuration.h" #include "Interface.h" #include "Mapping.h" +#include "State.h" #include "Types.h" using namespace _OMP; @@ -54,15 +55,11 @@ } } -/// Current indentation level for the function trace. Only accessed by thread 0. -__attribute__((loader_uninitialized)) -static uint32_t Level; -#pragma omp allocate(Level) allocator(omp_pteam_mem_alloc) - DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line, const char *Function) { if (config::isDebugMode(config::DebugKind::FunctionTracing) && mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) { + uint16_t &Level = state::getKernelEnvironment().DebugIndentionLevel; for (int I = 0; I < Level; ++I) PRINTF("%s", " "); @@ -75,8 +72,10 @@ DebugEntryRAII::~DebugEntryRAII() { if (config::isDebugMode(config::DebugKind::FunctionTracing) && - mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) + mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) { + uint16_t Level = state::getKernelEnvironment().DebugIndentionLevel; Level--; + } } void DebugEntryRAII::init() { Level = 0; } diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp --- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -16,20 +16,21 @@ #include "State.h" #include "Synchronization.h" #include "Types.h" +#include "llvm/Frontend/OpenMP/KernelEnvironment.h" using namespace _OMP; #pragma omp declare target -static void inititializeRuntime(bool IsSPMD) { +static void inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnv) { // Order is important here. synchronize::init(IsSPMD); mapping::init(IsSPMD); - state::init(IsSPMD); + state::init(IsSPMD, KernelEnv); } /// Simple generic state machine for worker threads. -static void genericStateMachine(IdentTy *Ident) { +static void genericStateMachine(IdentTy &Ident) { FunctionTracingRAII(); uint32_t TId = mapping::getThreadIdInBlock(); @@ -61,19 +62,15 @@ extern "C" { -/// Initialization -/// -/// \param Ident Source location identification, can be NULL. -/// -int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, - bool UseGenericStateMachine, bool) { +int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnv, bool) { FunctionTracingRAII(); - const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; + bool IsSPMD = KernelEnv.Configuration.ExecMode & OMP_TGT_EXEC_MODE_SPMD; + bool UseGenericStateMachine = KernelEnv.Configuration.UseGenericStateMachine; if (IsSPMD) { - inititializeRuntime(/* IsSPMD */ true); + inititializeRuntime(/* IsSPMD */ true, KernelEnv); synchronize::threadsAligned(); } else { - inititializeRuntime(/* IsSPMD */ false); + inititializeRuntime(/* IsSPMD */ false, KernelEnv); // No need to wait since only the main threads will execute user // code and workers will run into a barrier right away. } @@ -101,7 +98,7 @@ // thread's warp, so none of its threads can ever be active worker threads. if (UseGenericStateMachine && mapping::getThreadIdInBlock() < mapping::getBlockSize()) - genericStateMachine(Ident); + genericStateMachine(KernelEnv.Ident); return mapping::getThreadIdInBlock(); } @@ -110,12 +107,9 @@ /// /// In non-SPMD, this function releases the workers trapped in a state machine /// and also any memory dynamically allocated by the runtime. -/// -/// \param Ident Source location identification, can be NULL. -/// -void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool) { +void __kmpc_target_deinit(bool) { FunctionTracingRAII(); - const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; + const bool IsSPMD = mapping::isSPMDMode(); state::assumeInitialState(IsSPMD); if (IsSPMD) return; diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -34,6 +34,9 @@ extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment))); #pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc) +/// The kernel environment passed to the init method by the compiler. +static KernelEnvironmentTy *SHARED(KernelEnvironmentPtr); + namespace { /// Fallback implementations are missing to trigger a link time error. @@ -366,16 +369,21 @@ __builtin_unreachable(); } -void state::init(bool IsSPMD) { +void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnv) { SharedMemorySmartStack.init(IsSPMD); if (mapping::isInitialThreadInLevel0(IsSPMD)) { TeamState.init(IsSPMD); DebugEntryRAII::init(); + KernelEnvironmentPtr = &KernelEnv; } ThreadStates[mapping::getThreadIdInBlock()] = nullptr; } +KernelEnvironmentTy &state::getKernelEnvironment() { + return *KernelEnvironmentPtr; +} + void state::enterDataEnvironment() { unsigned TId = mapping::getThreadIdInBlock(); ThreadStateTy *NewThreadState = diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu --- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu @@ -208,10 +208,10 @@ } EXTERN -int32_t __kmpc_target_init(ident_t *Ident, int8_t Mode, - bool UseGenericStateMachine, +int32_t __kmpc_target_init(KernelEnvironmentTy& KernelEnv, bool RequiresFullRuntime) { - const bool IsSPMD = Mode & llvm::omp::OMP_TGT_EXEC_MODE_SPMD; + bool IsSPMD = KernelEnv.Configuration.ExecMode & llvm::omp::OMP_TGT_EXEC_MODE_SPMD; + bool UseGenericStateMachine = KernelEnv.Configuration.UseGenericStateMachine; int TId = __kmpc_get_hardware_thread_id_in_block(); if (IsSPMD) __kmpc_spmd_kernel_init(RequiresFullRuntime); @@ -219,7 +219,7 @@ __kmpc_generic_kernel_init(); if (IsSPMD) { - __kmpc_barrier_simple_spmd(Ident, TId); + __kmpc_barrier_simple_spmd(&KernelEnv.Ident, TId); return -1; } @@ -242,15 +242,14 @@ // main thread's warp, so none of its threads can ever be active worker // threads. if (UseGenericStateMachine && TId < GetNumberOfWorkersInTeam()) - __kmpc_target_region_state_machine(Ident); + __kmpc_target_region_state_machine(&KernelEnv.Ident); return TId; } EXTERN -void __kmpc_target_deinit(ident_t *Ident, int8_t Mode, - bool RequiresFullRuntime) { - const bool IsSPMD = Mode & llvm::omp::OMP_TGT_EXEC_MODE_SPMD; +void __kmpc_target_deinit(bool RequiresFullRuntime) { + const bool IsSPMD = __kmpc_is_spmd_exec_mode(); if (IsSPMD) __kmpc_spmd_kernel_deinit(RequiresFullRuntime); else diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h --- a/openmp/libomptarget/deviceRTLs/interface.h +++ b/openmp/libomptarget/deviceRTLs/interface.h @@ -437,12 +437,11 @@ EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid, int32_t cancelVal); +#include "llvm/Frontend/OpenMP/KernelEnvironment.h" // non standard -EXTERN int32_t __kmpc_target_init(ident_t *Ident, int8_t Mode, - bool UseGenericStateMachine, +EXTERN int32_t __kmpc_target_init(KernelEnvironmentTy& KernelEnvironment, bool RequiresFullRuntime); -EXTERN void __kmpc_target_deinit(ident_t *Ident, int8_t Mode, - bool RequiresFullRuntime); +EXTERN void __kmpc_target_deinit(bool RequiresFullRuntime); EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, int32_t NumThreadsClause); EXTERN bool __kmpc_kernel_parallel(void **WorkFn);