diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt @@ -76,7 +76,6 @@ ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_atomics.h ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h ${devicertl_base_directory}/common/debug.h - ${devicertl_base_directory}/common/device_environment.h ${devicertl_base_directory}/common/omptarget.h ${devicertl_base_directory}/common/omptargeti.h ${devicertl_base_directory}/common/state-queue.h @@ -110,7 +109,8 @@ --cuda-gpu-arch=${mcpu} ${CUDA_DEBUG} -I${CMAKE_CURRENT_SOURCE_DIR}/src - -I${devicertl_base_directory}) + -I${devicertl_base_directory} + -I${devicertl_base_directory}/../include) set(bc1_files) diff --git a/openmp/libomptarget/deviceRTLs/common/debug.h b/openmp/libomptarget/deviceRTLs/common/debug.h --- a/openmp/libomptarget/deviceRTLs/common/debug.h +++ b/openmp/libomptarget/deviceRTLs/common/debug.h @@ -28,7 +28,7 @@ #ifndef _OMPTARGET_NVPTX_DEBUG_H_ #define _OMPTARGET_NVPTX_DEBUG_H_ -#include "common/device_environment.h" +#include "device_environment.h" //////////////////////////////////////////////////////////////////////////////// // set desired level of debugging diff --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h --- a/openmp/libomptarget/deviceRTLs/common/omptarget.h +++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h @@ -293,6 +293,7 @@ extern DEVICE omptarget_nvptx_SimpleMemoryManager omptarget_nvptx_simpleMemoryManager; +extern DEVICE omptarget_device_environmentTy omptarget_device_environment; extern DEVICE SHARED uint32_t usedMemIdx; extern DEVICE SHARED uint32_t usedSlotIdx; extern DEVICE SHARED uint8_t diff --git a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu --- a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu @@ -13,6 +13,7 @@ #include "common/omptarget.h" #include "common/target_atomic.h" +#include "device_environment.h" #include "target_impl.h" EXTERN double omp_get_wtick(void) { @@ -299,6 +300,12 @@ return 0; } +EXTERN int omp_get_device_num(void) { + int32_t num = omptarget_device_environment.device_num; + PRINT(LD_IO, "call omp_get_device_num() returns %d\n", num); + return num; +} + EXTERN int omp_get_num_devices(void) { PRINT0(LD_IO, "call omp_get_num_devices() is undef on device, returns 0\n"); return 0; diff --git a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu --- a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "common/omptarget.h" -#include "common/device_environment.h" +#include "device_environment.h" //////////////////////////////////////////////////////////////////////////////// // global device environment diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h --- a/openmp/libomptarget/deviceRTLs/interface.h +++ b/openmp/libomptarget/deviceRTLs/interface.h @@ -86,6 +86,7 @@ EXTERN int omp_get_cancellation(void); EXTERN void omp_set_default_device(int deviceId); EXTERN int omp_get_default_device(void); +EXTERN int omp_get_device_num(void); EXTERN int omp_get_num_devices(void); EXTERN int omp_get_num_teams(void); EXTERN int omp_get_team_num(void); diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -104,7 +104,8 @@ set(BUILD_SHARED_LIBS OFF) set(CUDA_SEPARABLE_COMPILATION ON) list(APPEND CUDA_NVCC_FLAGS -I${devicertl_base_directory} - -I${devicertl_nvptx_directory}/src) + -I${devicertl_nvptx_directory}/src + -I${devicertl_base_directory}/../include) cuda_add_library(omptarget-nvptx STATIC ${cuda_src_files} ${omp_data_objects} OPTIONS ${CUDA_ARCH} ${CUDA_DEBUG} ${MAX_SM_DEFINITION}) @@ -140,7 +141,8 @@ # Set flags for LLVM Bitcode compilation. set(bc_flags ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS} -I${devicertl_base_directory} - -I${devicertl_nvptx_directory}/src) + -I${devicertl_nvptx_directory}/src + -I${devicertl_base_directory}/../include) if(${LIBOMPTARGET_NVPTX_DEBUG}) set(bc_flags ${bc_flags} -DOMPTARGET_NVPTX_DEBUG=-1) diff --git a/openmp/libomptarget/deviceRTLs/common/device_environment.h b/openmp/libomptarget/include/device_environment.h rename from openmp/libomptarget/deviceRTLs/common/device_environment.h rename to openmp/libomptarget/include/device_environment.h --- a/openmp/libomptarget/deviceRTLs/common/device_environment.h +++ b/openmp/libomptarget/include/device_environment.h @@ -13,12 +13,9 @@ #ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_ #define _OMPTARGET_DEVICE_ENVIRONMENT_H_ -#include "target_impl.h" - struct omptarget_device_environmentTy { int32_t debug_level; + int32_t device_num; }; -extern DEVICE omptarget_device_environmentTy omptarget_device_environment; - #endif diff --git a/openmp/libomptarget/plugins/CMakeLists.txt b/openmp/libomptarget/plugins/CMakeLists.txt --- a/openmp/libomptarget/plugins/CMakeLists.txt +++ b/openmp/libomptarget/plugins/CMakeLists.txt @@ -67,6 +67,7 @@ add_subdirectory(aarch64) add_subdirectory(cuda) +add_subdirectory(amdgpu) add_subdirectory(ppc64) add_subdirectory(ppc64le) add_subdirectory(ve) diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -527,13 +527,7 @@ pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER; -// TODO: May need to drop the trailing to fields until deviceRTL is updated -struct omptarget_device_environmentTy { - int32_t debug_level; // gets value of envvar LIBOMPTARGET_DEVICE_RTL_DEBUG - // only useful for Debug build of deviceRTLs - int32_t num_devices; // gets number of active offload devices - int32_t device_num; // gets a value 0 to num_devices-1 -}; +#include "device_environment.h" static RTLDeviceInfoTy DeviceInfo; @@ -941,7 +935,6 @@ } omptarget_device_environmentTy host_device_env; - host_device_env.num_devices = DeviceInfo.NumberOfDevices; host_device_env.device_num = device_id; host_device_env.debug_level = 0; #ifdef OMPTARGET_DEBUG diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -81,9 +81,7 @@ /// Device environment data /// Manually sync with the deviceRTL side for now, move to a dedicated header /// file later. -struct omptarget_device_environmentTy { - int32_t debug_level; -}; +#include "device_environment.h" namespace { bool checkResult(CUresult Err, const char *ErrMsg) { @@ -684,6 +682,7 @@ if (const char *EnvStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) DeviceEnv.debug_level = std::stoi(EnvStr); #endif + DeviceEnv.device_num = DeviceId; const char *DeviceEnvName = "omptarget_device_environment"; CUdeviceptr DeviceEnvPtr; diff --git a/openmp/libomptarget/test/offloading/get_device_num.c b/openmp/libomptarget/test/offloading/get_device_num.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/offloading/get_device_num.c @@ -0,0 +1,33 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu +// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda + +#include +#include + +int main() { + + const int num_devices = omp_get_num_devices(); + + if (num_devices > 0) { + for (unsigned i = 0; i < (unsigned)num_devices; i++) { + int num; +#pragma omp target map(from : num) device(i) + num = omp_get_device_num(); + + printf("Device %u returned id %u\n", i, num); + + if (num != i) { + printf("Fail: Device %u returned id %u\n", i, num); + return 1; + } + } + } + + printf("PASS\n"); + return 0; +} + +// CHECK: PASS