diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h --- a/openmp/libomptarget/include/omptargetplugin.h +++ b/openmp/libomptarget/include/omptargetplugin.h @@ -36,6 +36,11 @@ // function to move data from source device to destination device directly. int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId); +// Return an integer other than zero if the plugin can handle images which do +// not contain target regions and global variables (but can contain other +// functions) +int32_t __tgt_rtl_supports_empty_images(); + // Initialize the requires flags for the device. int64_t __tgt_rtl_init_requires(int64_t RequiresFlags); diff --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports --- a/openmp/libomptarget/plugins/exports +++ b/openmp/libomptarget/plugins/exports @@ -21,6 +21,7 @@ __tgt_rtl_synchronize; __tgt_rtl_register_lib; __tgt_rtl_unregister_lib; + __tgt_rtl_supports_empty_images; local: *; }; diff --git a/openmp/libomptarget/plugins/ve/src/rtl.cpp b/openmp/libomptarget/plugins/ve/src/rtl.cpp --- a/openmp/libomptarget/plugins/ve/src/rtl.cpp +++ b/openmp/libomptarget/plugins/ve/src/rtl.cpp @@ -444,3 +444,5 @@ return __tgt_rtl_run_target_team_region(ID, Entry, Args, Offsets, NumArgs, 1, 1, 0); } + +int32_t __tgt_rtl_supports_empty_images() { return 1; } diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h --- a/openmp/libomptarget/src/device.h +++ b/openmp/libomptarget/src/device.h @@ -241,6 +241,8 @@ /// Translation table retreived from the binary HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable; std::mutex TrlTblMtx; ///< For Translation Table + /// Host offload entries in order of image registration + std::vector<__tgt_offload_entry *> HostEntriesBeginRegistrationOrder; /// Map from ptrs on the host to an entry in the Translation Table HostPtrToTableMapTy HostPtrToTableMap; diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -75,18 +75,21 @@ */ int32_t device_id = Device.DeviceID; int rc = OFFLOAD_SUCCESS; + bool supportsEmptyImages = Device.RTL->supports_empty_images && + Device.RTL->supports_empty_images() > 0; Device.PendingGlobalsMtx.lock(); PM->TrlTblMtx.lock(); - for (HostEntriesBeginToTransTableTy::iterator entry_it = - PM->HostEntriesBeginToTransTable.begin(); - entry_it != PM->HostEntriesBeginToTransTable.end(); ++entry_it) { - TranslationTable *TransTable = &entry_it->second; + for (auto *HostEntriesBegin : PM->HostEntriesBeginRegistrationOrder) { + TranslationTable *TransTable = + &PM->HostEntriesBeginToTransTable[HostEntriesBegin]; if (TransTable->HostTable.EntriesBegin == - TransTable->HostTable.EntriesEnd) { + TransTable->HostTable.EntriesEnd && + !supportsEmptyImages) { // No host entry so no need to proceed continue; } + if (TransTable->TargetsTable[device_id] != 0) { // Library entries have already been processed continue; diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h --- a/openmp/libomptarget/src/rtl.h +++ b/openmp/libomptarget/src/rtl.h @@ -54,6 +54,7 @@ typedef int64_t(init_requires_ty)(int64_t); typedef int64_t(synchronize_ty)(int32_t, __tgt_async_info *); typedef int32_t (*register_lib_ty)(__tgt_bin_desc *); + typedef int32_t(supports_empty_images_ty)(); int32_t Idx = -1; // RTL index, index is the number of devices // of other RTLs that were registered before, @@ -89,6 +90,7 @@ synchronize_ty *synchronize = nullptr; register_lib_ty register_lib = nullptr; register_lib_ty unregister_lib = nullptr; + supports_empty_images_ty *supports_empty_images = nullptr; // Are there images associated with this RTL. bool isUsed = false; diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -173,6 +173,8 @@ dlsym(dynlib_handle, "__tgt_rtl_register_lib"); *((void **)&R.unregister_lib) = dlsym(dynlib_handle, "__tgt_rtl_unregister_lib"); + *((void **)&R.supports_empty_images) = + dlsym(dynlib_handle, "__tgt_rtl_supports_empty_images"); } DP("RTLs loaded!\n"); @@ -334,6 +336,7 @@ // Initialize (if necessary) translation table for this library. PM->TrlTblMtx.lock(); if (!PM->HostEntriesBeginToTransTable.count(desc->HostEntriesBegin)) { + PM->HostEntriesBeginRegistrationOrder.push_back(desc->HostEntriesBegin); TranslationTable &TransTable = (PM->HostEntriesBeginToTransTable)[desc->HostEntriesBegin]; TransTable.HostTable.EntriesBegin = desc->HostEntriesBegin;