diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -174,6 +174,7 @@ -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device -Xclang -target-feature -Xclang +ptx61 -I${include_directory} + -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL} ) diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp --- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp @@ -14,16 +14,10 @@ #include "Configuration.h" #include "State.h" #include "Types.h" +#include "device_environment.h" using namespace _OMP; -struct DeviceEnvironmentTy { - uint32_t DebugKind; - uint32_t NumDevices; - uint32_t DeviceNum; - uint64_t DynamicMemSize; -}; - #pragma omp declare target extern uint32_t __omp_rtl_debug_kind; diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt @@ -92,7 +92,6 @@ ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h ${devicertl_base_directory}/common/debug.h - ${devicertl_base_directory}/common/device_environment.h ${devicertl_base_directory}/common/omptarget.h ${devicertl_base_directory}/common/omptargeti.h ${devicertl_base_directory}/common/state-queue.h @@ -137,6 +136,7 @@ -I${CMAKE_CURRENT_SOURCE_DIR}/src -I${devicertl_base_directory}/common/include -I${devicertl_base_directory} + -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_AMDGCN}) set(bc1_files) diff --git a/openmp/libomptarget/deviceRTLs/common/debug.h b/openmp/libomptarget/deviceRTLs/common/debug.h --- a/openmp/libomptarget/deviceRTLs/common/debug.h +++ b/openmp/libomptarget/deviceRTLs/common/debug.h @@ -28,7 +28,6 @@ #ifndef _OMPTARGET_NVPTX_DEBUG_H_ #define _OMPTARGET_NVPTX_DEBUG_H_ -#include "common/device_environment.h" #include "target_interface.h" //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu --- a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu @@ -12,7 +12,6 @@ #pragma omp declare target #include "common/allocator.h" -#include "common/device_environment.h" #include "common/omptarget.h" //////////////////////////////////////////////////////////////////////////////// @@ -20,7 +19,7 @@ //////////////////////////////////////////////////////////////////////////////// PLUGIN_ACCESSIBLE -omptarget_device_environmentTy omptarget_device_environment; +DeviceEnvironmentTy omptarget_device_environment; //////////////////////////////////////////////////////////////////////////////// // global data holding OpenMP state information diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -173,6 +173,7 @@ -I${devicertl_base_directory} -I${devicertl_common_directory}/include -I${devicertl_nvptx_directory}/src + -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_NVPTX}) if(${LIBOMPTARGET_NVPTX_DEBUG}) diff --git a/openmp/libomptarget/deviceRTLs/target_interface.h b/openmp/libomptarget/deviceRTLs/target_interface.h --- a/openmp/libomptarget/deviceRTLs/target_interface.h +++ b/openmp/libomptarget/deviceRTLs/target_interface.h @@ -13,6 +13,9 @@ #ifndef _OMPTARGET_TARGET_INTERFACE_H_ #define _OMPTARGET_TARGET_INTERFACE_H_ +#include + +#include "device_environment.h" #include "target_impl.h" // Calls to the NVPTX layer (assuming 1D layout) @@ -70,4 +73,6 @@ // Barrier until num_threads arrive. EXTERN void __kmpc_impl_named_sync(uint32_t num_threads); +extern DeviceEnvironmentTy omptarget_device_environment; + #endif // _OMPTARGET_TARGET_INTERFACE_H_ diff --git a/openmp/libomptarget/deviceRTLs/common/device_environment.h b/openmp/libomptarget/include/device_environment.h rename from openmp/libomptarget/deviceRTLs/common/device_environment.h rename to openmp/libomptarget/include/device_environment.h --- a/openmp/libomptarget/deviceRTLs/common/device_environment.h +++ b/openmp/libomptarget/include/device_environment.h @@ -13,14 +13,13 @@ #ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_ #define _OMPTARGET_DEVICE_ENVIRONMENT_H_ -#include "target_impl.h" +// deviceRTL uses and DeviceRTL uses explicit definitions -struct omptarget_device_environmentTy { - int32_t debug_level; - uint32_t num_devices; - uint32_t device_num; +struct DeviceEnvironmentTy { + uint32_t DebugKind; + uint32_t NumDevices; + uint32_t DeviceNum; + uint32_t DynamicMemSize; }; -extern omptarget_device_environmentTy omptarget_device_environment; - #endif diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -30,6 +30,7 @@ #include "internal.h" #include "rt.h" +#include "device_environment.h" #include "get_elf_mach_gfx_name.h" #include "omptargetplugin.h" #include "print_tracing.h" @@ -802,14 +803,6 @@ pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER; -// TODO: May need to drop the trailing to fields until deviceRTL is updated -struct omptarget_device_environmentTy { - int32_t debug_level; // gets value of envvar LIBOMPTARGET_DEVICE_RTL_DEBUG - // only useful for Debug build of deviceRTLs - int32_t num_devices; // gets number of active offload devices - int32_t device_num; // gets a value 0 to num_devices-1 -}; - static RTLDeviceInfoTy DeviceInfo; namespace { @@ -1300,15 +1293,12 @@ } struct device_environment { - // initialise an omptarget_device_environmentTy in the deviceRTL + // initialise an DeviceEnvironmentTy in the deviceRTL // patches around differences in the deviceRTL between trunk, aomp, // rocmcc. Over time these differences will tend to zero and this class // simplified. - // Symbol may be in .data or .bss, and may be missing fields: - // - aomp has debug_level, num_devices, device_num - // - trunk has debug_level - // - under review in trunk is debug_level, device_num - // - rocmcc matches aomp, patch to swap num_devices and device_num + // Symbol may be in .data or .bss, and may be missing fields, todo: + // review aomp/trunk/rocm and simplify the following // The symbol may also have been deadstripped because the device side // accessors were unused. @@ -1318,7 +1308,7 @@ // gpu (trunk) and initialize after loading. const char *sym() { return "omptarget_device_environment"; } - omptarget_device_environmentTy host_device_env; + DeviceEnvironmentTy host_device_env; symbol_info si; bool valid = false; @@ -1329,12 +1319,13 @@ __tgt_device_image *image, const size_t img_size) : image(image), img_size(img_size) { - host_device_env.num_devices = number_devices; - host_device_env.device_num = device_id; - host_device_env.debug_level = 0; + host_device_env.NumDevices = number_devices; + host_device_env.DeviceNum = device_id; + host_device_env.DebugKind = 0; + host_device_env.DynamicMemSize = 0; #ifdef OMPTARGET_DEBUG if (char *envStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) { - host_device_env.debug_level = std::stoi(envStr); + host_device_env.DebugKind = std::stoi(envStr); } #endif @@ -1374,7 +1365,7 @@ if (!in_image()) { DP("Setting global device environment after load (%u bytes)\n", si.size); - int device_id = host_device_env.device_num; + int device_id = host_device_env.DeviceNum; auto &SymbolInfo = DeviceInfo.SymbolInfoTable[device_id]; void *state_ptr; uint32_t state_ptr_size; @@ -1430,9 +1421,9 @@ // This function loads the device image onto gpu[device_id] and does other // per-image initialization work. Specifically: // - // - Initialize an omptarget_device_environmentTy instance embedded in the + // - Initialize an DeviceEnvironmentTy instance embedded in the // image at the symbol "omptarget_device_environment" - // Fields debug_level, device_num, num_devices. Used by the deviceRTL. + // Fields DebugKind, DeviceNum, NumDevices. Used by the deviceRTL. // // - Allocate a large array per-gpu (could be moved to init_device) // - Read a uint64_t at symbol omptarget_nvptx_device_State_size diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -21,6 +21,7 @@ #include #include "Debug.h" +#include "device_environment.h" #include "omptargetplugin.h" #define TARGET_NAME CUDA @@ -87,16 +88,6 @@ : Func(_Func), ExecutionMode(_ExecutionMode) {} }; -/// Device environment data -/// Manually sync with the deviceRTL side for now, move to a dedicated header -/// file later. -struct omptarget_device_environmentTy { - int32_t debug_level; - uint32_t num_devices; - uint32_t device_num; - uint64_t dynamic_shared_size; -}; - namespace { bool checkResult(CUresult Err, const char *ErrMsg) { if (Err == CUDA_SUCCESS) @@ -897,9 +888,9 @@ // send device environment data to the device { // TODO: The device ID used here is not the real device ID used by OpenMP. - omptarget_device_environmentTy DeviceEnv{ - 0, static_cast(NumberOfDevices), - static_cast(DeviceId), DynamicMemorySize}; + DeviceEnvironmentTy DeviceEnv{0, static_cast(NumberOfDevices), + static_cast(DeviceId), + static_cast(DynamicMemorySize)}; #ifdef OMPTARGET_DEBUG if (const char *EnvStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG"))