Index: openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -1630,31 +1630,15 @@ Expected constructKernelEntry(const __tgt_offload_entry &KernelEntry, DeviceImageTy &Image) override { - // Create a metadata object for the exec mode global (auto-generated). - StaticGlobalTy ExecModeGlobal( - KernelEntry.name, "_exec_mode"); - - // Retrieve execution mode for the kernel. This may fail since some kernels - // may not have a execution mode. - GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler(); - if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) { - DP("Failed to read execution mode for '%s': %s\n" - "Using default GENERIC (1) execution mode\n", - KernelEntry.name, toString(std::move(Err)).data()); - // Consume the error since it is acceptable to fail. - consumeError(std::move(Err)); - // In some cases the execution mode is not included, so use the default. - ExecModeGlobal.setValue(llvm::omp::OMP_TGT_EXEC_MODE_GENERIC); - } - // Check that the retrieved execution mode is valid. - if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue())) - return Plugin::error("Invalid execution mode %d for '%s'", - ExecModeGlobal.getValue(), KernelEntry.name); + Expected ExecMode = + getExecutionModeForKernel(KernelEntry.name, Image); + if (!ExecMode) + return ExecMode.takeError(); // Allocate and initialize the AMDGPU kernel. AMDGPUKernelTy *AMDKernel = Plugin::get().allocate(); - new (AMDKernel) AMDGPUKernelTy(KernelEntry.name, ExecModeGlobal.getValue()); + new (AMDKernel) AMDGPUKernelTy(KernelEntry.name, ExecMode.get()); return AMDKernel; } Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h =================================================================== --- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -481,6 +481,10 @@ return ((const char *)It->first + It->second > (const char *)Buffer); } + /// Return the execution mode used for kernel \p Name. + Expected getExecutionModeForKernel(StringRef Name, + DeviceImageTy &Image); + /// Environment variables defined by the LLVM OpenMP implementation /// regarding the initial number of streams and events. UInt32Envar OMPX_InitialNumStreams; Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -16,6 +16,8 @@ #include "omptarget.h" #include "omptargetplugin.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" #include "llvm/Support/MemoryBuffer.h" @@ -576,6 +578,34 @@ return Plugin::success(); } +Expected +GenericDeviceTy::getExecutionModeForKernel(StringRef Name, + DeviceImageTy &Image) { + // Create a metadata object for the exec mode global (auto-generated). + StaticGlobalTy ExecModeGlobal(Name.data(), + "_exec_mode"); + + // Retrieve execution mode for the kernel. This may fail since some kernels + // may not have an execution mode. + GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler(); + if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) { + // Consume the error since it is acceptable to fail. + [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); + DP("Failed to read execution mode for '%s': %s\n" + "Using default SPMD (2) execution mode\n", + Name.data(), ErrStr.data()); + + return OMP_TGT_EXEC_MODE_SPMD; + } + + // Check that the retrieved execution mode is valid. + if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue())) + return Plugin::error("Invalid execution mode %d for '%s'", + ExecModeGlobal.getValue(), Name.data()); + + return ExecModeGlobal.getValue(); +} + Error GenericDeviceTy::registerHostPinnedMemoryBuffer(const void *Buffer, size_t Size) { std::lock_guard Lock(HostAllocationsMutex); Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -24,6 +24,7 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" +#include "llvm/Support/Error.h" namespace llvm { namespace omp { @@ -348,33 +349,14 @@ DP("Entry point " DPxMOD " maps to %s (" DPxMOD ")\n", DPxPTR(&KernelEntry), KernelEntry.name, DPxPTR(Func)); - // Create a metadata object for the exec mode global (auto-generated). - StaticGlobalTy ExecModeGlobal( - KernelEntry.name, "_exec_mode"); - - // Retrieve execution mode for the kernel. This may fail since some kernels - // may not have a execution mode. - GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler(); - if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) { - // In some cases the execution mode is not included, so use the default. - ExecModeGlobal.setValue(llvm::omp::OMP_TGT_EXEC_MODE_GENERIC); - // Consume the error since it is acceptable to fail. - [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); - - DP("Failed to read execution mode for '%s': %s\n" - "Using default GENERIC (1) execution mode\n", - KernelEntry.name, ErrStr.data()); - } - - // Check that the retrieved execution mode is valid. - if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue())) - return Plugin::error("Invalid execution mode %d for '%s'", - ExecModeGlobal.getValue(), KernelEntry.name); + Expected ExecMode = + getExecutionModeForKernel(KernelEntry.name, Image); + if (!ExecMode) + return ExecMode.takeError(); // Allocate and initialize the CUDA kernel. CUDAKernelTy *CUDAKernel = Plugin::get().allocate(); - new (CUDAKernel) - CUDAKernelTy(KernelEntry.name, ExecModeGlobal.getValue(), Func); + new (CUDAKernel) CUDAKernelTy(KernelEntry.name, ExecMode.get(), Func); return CUDAKernel; }