Index: libomptarget/src/CMakeLists.txt =================================================================== --- libomptarget/src/CMakeLists.txt +++ libomptarget/src/CMakeLists.txt @@ -14,6 +14,7 @@ libomptarget_say("Building offloading runtime library libomptarget.") set(src_files + rtl.cpp omptarget.cpp ) Index: libomptarget/src/device.h =================================================================== --- /dev/null +++ libomptarget/src/device.h @@ -0,0 +1,161 @@ +//===----------- device.h - Target independent OpenMP target RTL ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// +// +// Declarations for managing devices that are handled by RTL plugins. +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPTARGET_DEVICE_H +#define _OMPTARGET_DEVICE_H + +#include +#include +#include +#include +#include + +// Forward declarations. +struct RTLInfoTy; +struct __tgt_bin_desc; +struct __tgt_target_table; + +/// Map between host data and target data. +struct HostDataToTargetTy { + uintptr_t HstPtrBase; // host info. + uintptr_t HstPtrBegin; + uintptr_t HstPtrEnd; // non-inclusive. + + uintptr_t TgtPtrBegin; // target info. + + long RefCount; + + HostDataToTargetTy() + : HstPtrBase(0), HstPtrBegin(0), HstPtrEnd(0), + TgtPtrBegin(0), RefCount(0) {} + HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB) + : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), + TgtPtrBegin(TB), RefCount(1) {} + HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB, + long RF) + : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), + TgtPtrBegin(TB), RefCount(RF) {} +}; + +typedef std::list HostDataToTargetListTy; + +struct LookupResult { + struct { + unsigned IsContained : 1; + unsigned ExtendsBefore : 1; + unsigned ExtendsAfter : 1; + } Flags; + + HostDataToTargetListTy::iterator Entry; + + LookupResult() : Flags({0,0,0}), Entry() {} +}; + +/// Map for shadow pointers +struct ShadowPtrValTy { + void *HstPtrVal; + void *TgtPtrAddr; + void *TgtPtrVal; +}; +typedef std::map ShadowPtrListTy; + +/// +struct PendingCtorDtorListsTy { + std::list PendingCtors; + std::list PendingDtors; +}; +typedef std::map<__tgt_bin_desc *, PendingCtorDtorListsTy> + PendingCtorsDtorsPerLibrary; + +struct DeviceTy { + int32_t DeviceID; + RTLInfoTy *RTL; + int32_t RTLDeviceID; + + bool IsInit; + std::once_flag InitFlag; + bool HasPendingGlobals; + + HostDataToTargetListTy HostDataToTargetMap; + PendingCtorsDtorsPerLibrary PendingCtorsDtors; + + ShadowPtrListTy ShadowPtrMap; + + std::mutex DataMapMtx, PendingGlobalsMtx, ShadowMtx; + + uint64_t loopTripCnt; + + DeviceTy(RTLInfoTy *RTL) + : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), + HasPendingGlobals(false), HostDataToTargetMap(), + PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), + ShadowMtx(), loopTripCnt(0) {} + + // The existence of mutexes makes DeviceTy non-copyable. We need to + // provide a copy constructor and an assignment operator explicitly. + DeviceTy(const DeviceTy &d) + : DeviceID(d.DeviceID), RTL(d.RTL), RTLDeviceID(d.RTLDeviceID), + IsInit(d.IsInit), InitFlag(), HasPendingGlobals(d.HasPendingGlobals), + HostDataToTargetMap(d.HostDataToTargetMap), + PendingCtorsDtors(d.PendingCtorsDtors), ShadowPtrMap(d.ShadowPtrMap), + DataMapMtx(), PendingGlobalsMtx(), + ShadowMtx(), loopTripCnt(d.loopTripCnt) {} + + DeviceTy& operator=(const DeviceTy &d) { + DeviceID = d.DeviceID; + RTL = d.RTL; + RTLDeviceID = d.RTLDeviceID; + IsInit = d.IsInit; + HasPendingGlobals = d.HasPendingGlobals; + HostDataToTargetMap = d.HostDataToTargetMap; + PendingCtorsDtors = d.PendingCtorsDtors; + ShadowPtrMap = d.ShadowPtrMap; + loopTripCnt = d.loopTripCnt; + + return *this; + } + + long getMapEntryRefCnt(void *HstPtrBegin); + LookupResult lookupMapping(void *HstPtrBegin, int64_t Size); + void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size, + bool &IsNew, bool IsImplicit, bool UpdateRefCount = true); + void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); + void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, + bool UpdateRefCount); + int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete); + int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); + int disassociatePtr(void *HstPtrBegin); + + // calls to RTL + int32_t initOnce(); + __tgt_target_table *load_binary(void *Img); + + int32_t data_submit(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size); + int32_t data_retrieve(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); + + int32_t run_region(void *TgtEntryPtr, void **TgtVarsPtr, + ptrdiff_t *TgtOffsets, int32_t TgtVarsSize); + int32_t run_team_region(void *TgtEntryPtr, void **TgtVarsPtr, + ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, int32_t NumTeams, + int32_t ThreadLimit, uint64_t LoopTripCount); + +private: + // Call to RTL + void init(); // To be called only via DeviceTy::initOnce() +}; + +/// Map between Device ID (i.e. openmp device id) and its DeviceTy. +typedef std::vector DevicesTy; +extern DevicesTy Devices; + +#endif Index: libomptarget/src/omptarget.cpp =================================================================== --- libomptarget/src/omptarget.cpp +++ libomptarget/src/omptarget.cpp @@ -12,412 +12,27 @@ // //===----------------------------------------------------------------------===// -#include +#include + +#include "device.h" +#include "private.h" +#include "rtl.h" + #include #include -#include #include -#include -#include -#include -#include #include #include -// Header file global to this project -#include "omptarget.h" - #ifdef OMPTARGET_DEBUG -static int DebugLevel = 0; - -#define DP(...) \ - do { \ - if (DebugLevel > 0) { \ - DEBUGP("Libomptarget", __VA_ARGS__); \ - } \ - } while (false) -#else // OMPTARGET_DEBUG -#define DP(...) {} +int DebugLevel = 0; #endif // OMPTARGET_DEBUG #define INF_REF_CNT (LONG_MAX>>1) // leave room for additions/subtractions #define CONSIDERED_INF(x) (x > (INF_REF_CNT>>1)) -// List of all plugins that can support offloading. -static const char *RTLNames[] = { - /* PowerPC target */ "libomptarget.rtl.ppc64.so", - /* x86_64 target */ "libomptarget.rtl.x86_64.so", - /* CUDA target */ "libomptarget.rtl.cuda.so", - /* AArch64 target */ "libomptarget.rtl.aarch64.so"}; - -// forward declarations -struct RTLInfoTy; -static int target(int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - int32_t team_num, int32_t thread_limit, int IsTeamConstruct); - -/// Map between host data and target data. -struct HostDataToTargetTy { - uintptr_t HstPtrBase; // host info. - uintptr_t HstPtrBegin; - uintptr_t HstPtrEnd; // non-inclusive. - - uintptr_t TgtPtrBegin; // target info. - - long RefCount; - - HostDataToTargetTy() - : HstPtrBase(0), HstPtrBegin(0), HstPtrEnd(0), - TgtPtrBegin(0), RefCount(0) {} - HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB) - : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), - TgtPtrBegin(TB), RefCount(1) {} - HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB, - long RF) - : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), - TgtPtrBegin(TB), RefCount(RF) {} -}; - -typedef std::list HostDataToTargetListTy; - -struct LookupResult { - struct { - unsigned IsContained : 1; - unsigned ExtendsBefore : 1; - unsigned ExtendsAfter : 1; - } Flags; - - HostDataToTargetListTy::iterator Entry; - - LookupResult() : Flags({0,0,0}), Entry() {} -}; - -/// Map for shadow pointers -struct ShadowPtrValTy { - void *HstPtrVal; - void *TgtPtrAddr; - void *TgtPtrVal; -}; -typedef std::map ShadowPtrListTy; - -/// -struct PendingCtorDtorListsTy { - std::list PendingCtors; - std::list PendingDtors; -}; -typedef std::map<__tgt_bin_desc *, PendingCtorDtorListsTy> - PendingCtorsDtorsPerLibrary; - -struct DeviceTy { - int32_t DeviceID; - RTLInfoTy *RTL; - int32_t RTLDeviceID; - - bool IsInit; - std::once_flag InitFlag; - bool HasPendingGlobals; - - HostDataToTargetListTy HostDataToTargetMap; - PendingCtorsDtorsPerLibrary PendingCtorsDtors; - - ShadowPtrListTy ShadowPtrMap; - - std::mutex DataMapMtx, PendingGlobalsMtx, ShadowMtx; - - uint64_t loopTripCnt; - - DeviceTy(RTLInfoTy *RTL) - : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), - HasPendingGlobals(false), HostDataToTargetMap(), - PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), - ShadowMtx(), loopTripCnt(0) {} - - // The existence of mutexes makes DeviceTy non-copyable. We need to - // provide a copy constructor and an assignment operator explicitly. - DeviceTy(const DeviceTy &d) - : DeviceID(d.DeviceID), RTL(d.RTL), RTLDeviceID(d.RTLDeviceID), - IsInit(d.IsInit), InitFlag(), HasPendingGlobals(d.HasPendingGlobals), - HostDataToTargetMap(d.HostDataToTargetMap), - PendingCtorsDtors(d.PendingCtorsDtors), ShadowPtrMap(d.ShadowPtrMap), - DataMapMtx(), PendingGlobalsMtx(), - ShadowMtx(), loopTripCnt(d.loopTripCnt) {} - - DeviceTy& operator=(const DeviceTy &d) { - DeviceID = d.DeviceID; - RTL = d.RTL; - RTLDeviceID = d.RTLDeviceID; - IsInit = d.IsInit; - HasPendingGlobals = d.HasPendingGlobals; - HostDataToTargetMap = d.HostDataToTargetMap; - PendingCtorsDtors = d.PendingCtorsDtors; - ShadowPtrMap = d.ShadowPtrMap; - loopTripCnt = d.loopTripCnt; - - return *this; - } - - long getMapEntryRefCnt(void *HstPtrBegin); - LookupResult lookupMapping(void *HstPtrBegin, int64_t Size); - void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size, - bool &IsNew, bool IsImplicit, bool UpdateRefCount = true); - void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); - void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, - bool UpdateRefCount); - int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete); - int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); - int disassociatePtr(void *HstPtrBegin); - - // calls to RTL - int32_t initOnce(); - __tgt_target_table *load_binary(void *Img); - - int32_t data_submit(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size); - int32_t data_retrieve(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); - - int32_t run_region(void *TgtEntryPtr, void **TgtVarsPtr, - ptrdiff_t *TgtOffsets, int32_t TgtVarsSize); - int32_t run_team_region(void *TgtEntryPtr, void **TgtVarsPtr, - ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, int32_t NumTeams, - int32_t ThreadLimit, uint64_t LoopTripCount); - -private: - // Call to RTL - void init(); // To be called only via DeviceTy::initOnce() -}; - /// Map between Device ID (i.e. openmp device id) and its DeviceTy. -typedef std::vector DevicesTy; -static DevicesTy Devices; - -struct RTLInfoTy { - typedef int32_t(is_valid_binary_ty)(void *); - typedef int32_t(number_of_devices_ty)(); - typedef int32_t(init_device_ty)(int32_t); - typedef __tgt_target_table *(load_binary_ty)(int32_t, void *); - typedef void *(data_alloc_ty)(int32_t, int64_t, void *); - typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t); - typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t); - typedef int32_t(data_delete_ty)(int32_t, void *); - typedef int32_t(run_region_ty)(int32_t, void *, void **, ptrdiff_t *, - int32_t); - typedef int32_t(run_team_region_ty)(int32_t, void *, void **, ptrdiff_t *, - int32_t, int32_t, int32_t, uint64_t); - - int32_t Idx; // RTL index, index is the number of devices - // of other RTLs that were registered before, - // i.e. the OpenMP index of the first device - // to be registered with this RTL. - int32_t NumberOfDevices; // Number of devices this RTL deals with. - std::vector Devices; // one per device (NumberOfDevices in total). - - void *LibraryHandler; - -#ifdef OMPTARGET_DEBUG - std::string RTLName; -#endif - - // Functions implemented in the RTL. - is_valid_binary_ty *is_valid_binary; - number_of_devices_ty *number_of_devices; - init_device_ty *init_device; - load_binary_ty *load_binary; - data_alloc_ty *data_alloc; - data_submit_ty *data_submit; - data_retrieve_ty *data_retrieve; - data_delete_ty *data_delete; - run_region_ty *run_region; - run_team_region_ty *run_team_region; - - // Are there images associated with this RTL. - bool isUsed; - - // Mutex for thread-safety when calling RTL interface functions. - // It is easier to enforce thread-safety at the libomptarget level, - // so that developers of new RTLs do not have to worry about it. - std::mutex Mtx; - - // The existence of the mutex above makes RTLInfoTy non-copyable. - // We need to provide a copy constructor explicitly. - RTLInfoTy() - : Idx(-1), NumberOfDevices(-1), Devices(), LibraryHandler(0), -#ifdef OMPTARGET_DEBUG - RTLName(), -#endif - is_valid_binary(0), number_of_devices(0), init_device(0), - load_binary(0), data_alloc(0), data_submit(0), data_retrieve(0), - data_delete(0), run_region(0), run_team_region(0), isUsed(false), - Mtx() {} - - RTLInfoTy(const RTLInfoTy &r) : Mtx() { - Idx = r.Idx; - NumberOfDevices = r.NumberOfDevices; - Devices = r.Devices; - LibraryHandler = r.LibraryHandler; -#ifdef OMPTARGET_DEBUG - RTLName = r.RTLName; -#endif - is_valid_binary = r.is_valid_binary; - number_of_devices = r.number_of_devices; - init_device = r.init_device; - load_binary = r.load_binary; - data_alloc = r.data_alloc; - data_submit = r.data_submit; - data_retrieve = r.data_retrieve; - data_delete = r.data_delete; - run_region = r.run_region; - run_team_region = r.run_team_region; - isUsed = r.isUsed; - } -}; - -/// RTLs identified in the system. -class RTLsTy { -private: - // Mutex-like object to guarantee thread-safety and unique initialization - // (i.e. the library attempts to load the RTLs (plugins) only once). - std::once_flag initFlag; - void LoadRTLs(); // not thread-safe - -public: - // List of the detected runtime libraries. - std::list AllRTLs; - - // Array of pointers to the detected runtime libraries that have compatible - // binaries. - std::vector UsedRTLs; - - explicit RTLsTy() {} - - // Load all the runtime libraries (plugins) if not done before. - void LoadRTLsOnce(); -}; - -void RTLsTy::LoadRTLs() { -#ifdef OMPTARGET_DEBUG - if (char *envStr = getenv("LIBOMPTARGET_DEBUG")) { - DebugLevel = std::stoi(envStr); - } -#endif // OMPTARGET_DEBUG - - // Parse environment variable OMP_TARGET_OFFLOAD (if set) - char *envStr = getenv("OMP_TARGET_OFFLOAD"); - if (envStr && !strcmp(envStr, "DISABLED")) { - DP("Target offloading disabled by environment\n"); - return; - } - - DP("Loading RTLs...\n"); - - // Attempt to open all the plugins and, if they exist, check if the interface - // is correct and if they are supporting any devices. - for (auto *Name : RTLNames) { - DP("Loading library '%s'...\n", Name); - void *dynlib_handle = dlopen(Name, RTLD_NOW); - - if (!dynlib_handle) { - // Library does not exist or cannot be found. - DP("Unable to load library '%s': %s!\n", Name, dlerror()); - continue; - } - - DP("Successfully loaded library '%s'!\n", Name); - - // Retrieve the RTL information from the runtime library. - RTLInfoTy R; - - R.LibraryHandler = dynlib_handle; - R.isUsed = false; - -#ifdef OMPTARGET_DEBUG - R.RTLName = Name; -#endif - - if (!(*((void**) &R.is_valid_binary) = dlsym( - dynlib_handle, "__tgt_rtl_is_valid_binary"))) - continue; - if (!(*((void**) &R.number_of_devices) = dlsym( - dynlib_handle, "__tgt_rtl_number_of_devices"))) - continue; - if (!(*((void**) &R.init_device) = dlsym( - dynlib_handle, "__tgt_rtl_init_device"))) - continue; - if (!(*((void**) &R.load_binary) = dlsym( - dynlib_handle, "__tgt_rtl_load_binary"))) - continue; - if (!(*((void**) &R.data_alloc) = dlsym( - dynlib_handle, "__tgt_rtl_data_alloc"))) - continue; - if (!(*((void**) &R.data_submit) = dlsym( - dynlib_handle, "__tgt_rtl_data_submit"))) - continue; - if (!(*((void**) &R.data_retrieve) = dlsym( - dynlib_handle, "__tgt_rtl_data_retrieve"))) - continue; - if (!(*((void**) &R.data_delete) = dlsym( - dynlib_handle, "__tgt_rtl_data_delete"))) - continue; - if (!(*((void**) &R.run_region) = dlsym( - dynlib_handle, "__tgt_rtl_run_target_region"))) - continue; - if (!(*((void**) &R.run_team_region) = dlsym( - dynlib_handle, "__tgt_rtl_run_target_team_region"))) - continue; - - // No devices are supported by this RTL? - if (!(R.NumberOfDevices = R.number_of_devices())) { - DP("No devices supported in this RTL\n"); - continue; - } - - DP("Registering RTL %s supporting %d devices!\n", - R.RTLName.c_str(), R.NumberOfDevices); - - // The RTL is valid! Will save the information in the RTLs list. - AllRTLs.push_back(R); - } - - DP("RTLs loaded!\n"); - - return; -} - -void RTLsTy::LoadRTLsOnce() { - // RTL.LoadRTLs() is called only once in a thread-safe fashion. - std::call_once(initFlag, &RTLsTy::LoadRTLs, this); -} - -static RTLsTy RTLs; -static std::mutex RTLsMtx; - -/// Map between the host entry begin and the translation table. Each -/// registered library gets one TranslationTable. Use the map from -/// __tgt_offload_entry so that we may quickly determine whether we -/// are trying to (re)register an existing lib or really have a new one. -struct TranslationTable { - __tgt_target_table HostTable; - - // Image assigned to a given device. - std::vector<__tgt_device_image *> TargetsImages; // One image per device ID. - - // Table of entry points or NULL if it was not already computed. - std::vector<__tgt_target_table *> TargetsTable; // One table per device ID. -}; -typedef std::map<__tgt_offload_entry *, TranslationTable> - HostEntriesBeginToTransTableTy; -static HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable; -static std::mutex TrlTblMtx; - -/// Map between the host ptr and a table index -struct TableMap { - TranslationTable *Table; // table associated with the host ptr. - uint32_t Index; // index in which the host ptr translated entry is found. - TableMap() : Table(0), Index(0) {} - TableMap(TranslationTable *table, uint32_t index) - : Table(table), Index(index) {} -}; -typedef std::map HostPtrToTableMapTy; -static HostPtrToTableMapTy HostPtrToTableMap; -static std::mutex TblMapMtx; +DevicesTy Devices; /// Check whether a device has an associated RTL and initialize it if it's not /// already initialized. @@ -1029,248 +644,15 @@ } //////////////////////////////////////////////////////////////////////////////// -// Functionality for registering libs - -static void RegisterImageIntoTranslationTable(TranslationTable &TT, - RTLInfoTy &RTL, __tgt_device_image *image) { - - // same size, as when we increase one, we also increase the other. - assert(TT.TargetsTable.size() == TT.TargetsImages.size() && - "We should have as many images as we have tables!"); - - // Resize the Targets Table and Images to accommodate the new targets if - // required - unsigned TargetsTableMinimumSize = RTL.Idx + RTL.NumberOfDevices; - - if (TT.TargetsTable.size() < TargetsTableMinimumSize) { - TT.TargetsImages.resize(TargetsTableMinimumSize, 0); - TT.TargetsTable.resize(TargetsTableMinimumSize, 0); - } - - // Register the image in all devices for this target type. - for (int32_t i = 0; i < RTL.NumberOfDevices; ++i) { - // If we are changing the image we are also invalidating the target table. - if (TT.TargetsImages[RTL.Idx + i] != image) { - TT.TargetsImages[RTL.Idx + i] = image; - TT.TargetsTable[RTL.Idx + i] = 0; // lazy initialization of target table. - } - } -} - -//////////////////////////////////////////////////////////////////////////////// -// Functionality for registering Ctors/Dtors - -static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc, - __tgt_device_image *img, RTLInfoTy *RTL) { - - for (int32_t i = 0; i < RTL->NumberOfDevices; ++i) { - DeviceTy &Device = Devices[RTL->Idx + i]; - Device.PendingGlobalsMtx.lock(); - Device.HasPendingGlobals = true; - for (__tgt_offload_entry *entry = img->EntriesBegin; - entry != img->EntriesEnd; ++entry) { - if (entry->flags & OMP_DECLARE_TARGET_CTOR) { - DP("Adding ctor " DPxMOD " to the pending list.\n", - DPxPTR(entry->addr)); - Device.PendingCtorsDtors[desc].PendingCtors.push_back(entry->addr); - } else if (entry->flags & OMP_DECLARE_TARGET_DTOR) { - // Dtors are pushed in reverse order so they are executed from end - // to beginning when unregistering the library! - DP("Adding dtor " DPxMOD " to the pending list.\n", - DPxPTR(entry->addr)); - Device.PendingCtorsDtors[desc].PendingDtors.push_front(entry->addr); - } - - if (entry->flags & OMP_DECLARE_TARGET_LINK) { - DP("The \"link\" attribute is not yet supported!\n"); - } - } - Device.PendingGlobalsMtx.unlock(); - } -} - -//////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { - - // Attempt to load all plugins available in the system. - RTLs.LoadRTLsOnce(); - - RTLsMtx.lock(); - // Register the images with the RTLs that understand them, if any. - for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { - // Obtain the image. - __tgt_device_image *img = &desc->DeviceImages[i]; - - RTLInfoTy *FoundRTL = NULL; - - // Scan the RTLs that have associated images until we find one that supports - // the current image. - for (auto &R : RTLs.AllRTLs) { - if (!R.is_valid_binary(img)) { - DP("Image " DPxMOD " is NOT compatible with RTL %s!\n", - DPxPTR(img->ImageStart), R.RTLName.c_str()); - continue; - } - - DP("Image " DPxMOD " is compatible with RTL %s!\n", - DPxPTR(img->ImageStart), R.RTLName.c_str()); - - // If this RTL is not already in use, initialize it. - if (!R.isUsed) { - // Initialize the device information for the RTL we are about to use. - DeviceTy device(&R); - - size_t start = Devices.size(); - Devices.resize(start + R.NumberOfDevices, device); - for (int32_t device_id = 0; device_id < R.NumberOfDevices; - device_id++) { - // global device ID - Devices[start + device_id].DeviceID = start + device_id; - // RTL local device ID - Devices[start + device_id].RTLDeviceID = device_id; - - // Save pointer to device in RTL in case we want to unregister the RTL - R.Devices.push_back(&Devices[start + device_id]); - } - - // Initialize the index of this RTL and save it in the used RTLs. - R.Idx = (RTLs.UsedRTLs.empty()) - ? 0 - : RTLs.UsedRTLs.back()->Idx + - RTLs.UsedRTLs.back()->NumberOfDevices; - assert((size_t) R.Idx == start && - "RTL index should equal the number of devices used so far."); - R.isUsed = true; - RTLs.UsedRTLs.push_back(&R); - - DP("RTL " DPxMOD " has index %d!\n", DPxPTR(R.LibraryHandler), R.Idx); - } - - // Initialize (if necessary) translation table for this library. - TrlTblMtx.lock(); - if(!HostEntriesBeginToTransTable.count(desc->HostEntriesBegin)){ - TranslationTable &tt = - HostEntriesBeginToTransTable[desc->HostEntriesBegin]; - tt.HostTable.EntriesBegin = desc->HostEntriesBegin; - tt.HostTable.EntriesEnd = desc->HostEntriesEnd; - } - - // Retrieve translation table for this library. - TranslationTable &TransTable = - HostEntriesBeginToTransTable[desc->HostEntriesBegin]; - - DP("Registering image " DPxMOD " with RTL %s!\n", - DPxPTR(img->ImageStart), R.RTLName.c_str()); - RegisterImageIntoTranslationTable(TransTable, R, img); - TrlTblMtx.unlock(); - FoundRTL = &R; - - // Load ctors/dtors for static objects - RegisterGlobalCtorsDtorsForImage(desc, img, FoundRTL); - - // if an RTL was found we are done - proceed to register the next image - break; - } - - if (!FoundRTL) { - DP("No RTL found for image " DPxMOD "!\n", DPxPTR(img->ImageStart)); - } - } - RTLsMtx.unlock(); - - - DP("Done registering entries!\n"); + RTLs.RegisterLib(desc); } //////////////////////////////////////////////////////////////////////////////// /// unloads a target shared library EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { - DP("Unloading target library!\n"); - - RTLsMtx.lock(); - // Find which RTL understands each image, if any. - for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { - // Obtain the image. - __tgt_device_image *img = &desc->DeviceImages[i]; - - RTLInfoTy *FoundRTL = NULL; - - // Scan the RTLs that have associated images until we find one that supports - // the current image. We only need to scan RTLs that are already being used. - for (auto *R : RTLs.UsedRTLs) { - - assert(R->isUsed && "Expecting used RTLs."); - - if (!R->is_valid_binary(img)) { - DP("Image " DPxMOD " is NOT compatible with RTL " DPxMOD "!\n", - DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); - continue; - } - - DP("Image " DPxMOD " is compatible with RTL " DPxMOD "!\n", - DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); - - FoundRTL = R; - - // Execute dtors for static objects if the device has been used, i.e. - // if its PendingCtors list has been emptied. - for (int32_t i = 0; i < FoundRTL->NumberOfDevices; ++i) { - DeviceTy &Device = Devices[FoundRTL->Idx + i]; - Device.PendingGlobalsMtx.lock(); - if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) { - for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) { - int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, 1, - 1, true /*team*/); - if (rc != OFFLOAD_SUCCESS) { - DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor)); - } - } - // Remove this library's entry from PendingCtorsDtors - Device.PendingCtorsDtors.erase(desc); - } - Device.PendingGlobalsMtx.unlock(); - } - - DP("Unregistered image " DPxMOD " from RTL " DPxMOD "!\n", - DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); - - break; - } - - // if no RTL was found proceed to unregister the next image - if (!FoundRTL){ - DP("No RTLs in use support the image " DPxMOD "!\n", - DPxPTR(img->ImageStart)); - } - } - RTLsMtx.unlock(); - DP("Done unregistering images!\n"); - - // Remove entries from HostPtrToTableMap - TblMapMtx.lock(); - for (__tgt_offload_entry *cur = desc->HostEntriesBegin; - cur < desc->HostEntriesEnd; ++cur) { - HostPtrToTableMap.erase(cur->addr); - } - - // Remove translation table for this descriptor. - auto tt = HostEntriesBeginToTransTable.find(desc->HostEntriesBegin); - if (tt != HostEntriesBeginToTransTable.end()) { - DP("Removing translation table for descriptor " DPxMOD "\n", - DPxPTR(desc->HostEntriesBegin)); - HostEntriesBeginToTransTable.erase(tt); - } else { - DP("Translation table for descriptor " DPxMOD " cannot be found, probably " - "it has been already removed.\n", DPxPTR(desc->HostEntriesBegin)); - } - - TblMapMtx.unlock(); - - // TODO: Remove RTL and the devices it manages if it's not used anymore? - // TODO: Write some RTL->unload_image(...) function? - - DP("Done unregistering library!\n"); + RTLs.UnregisterLib(desc); } /// Map global data and execute pending ctors @@ -2060,7 +1442,7 @@ /// performs the same action as data_update and data_end above. This function /// returns 0 if it was able to transfer the execution to a target and an /// integer different from zero otherwise. -static int target(int64_t device_id, void *host_ptr, int32_t arg_num, +int target(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, int32_t team_num, int32_t thread_limit, int IsTeamConstruct) { DeviceTy &Device = Devices[device_id]; Index: libomptarget/src/private.h =================================================================== --- /dev/null +++ libomptarget/src/private.h @@ -0,0 +1,38 @@ +//===---------- private.h - Target independent OpenMP target RTL ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// +// +// Private function declarations and helper macros for debugging output. +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPTARGET_PRIVATE_H +#define _OMPTARGET_PRIVATE_H + +#include + +#include + +extern int target(int64_t device_id, void *host_ptr, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, + int32_t team_num, int32_t thread_limit, int IsTeamConstruct); + +#ifdef OMPTARGET_DEBUG +extern int DebugLevel; + +#define DP(...) \ + do { \ + if (DebugLevel > 0) { \ + DEBUGP("Libomptarget", __VA_ARGS__); \ + } \ + } while (false) +#else // OMPTARGET_DEBUG +#define DP(...) {} +#endif // OMPTARGET_DEBUG + +#endif Index: libomptarget/src/rtl.h =================================================================== --- /dev/null +++ libomptarget/src/rtl.h @@ -0,0 +1,165 @@ +//===------------ rtl.h - Target independent OpenMP target RTL ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// +// +// Declarations for handling RTL plugins. +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPTARGET_RTL_H +#define _OMPTARGET_RTL_H + +#include +#include +#include +#include + +// Forward declarations. +struct DeviceTy; +struct __tgt_bin_desc; + +struct RTLInfoTy { + typedef int32_t(is_valid_binary_ty)(void *); + typedef int32_t(number_of_devices_ty)(); + typedef int32_t(init_device_ty)(int32_t); + typedef __tgt_target_table *(load_binary_ty)(int32_t, void *); + typedef void *(data_alloc_ty)(int32_t, int64_t, void *); + typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t); + typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t); + typedef int32_t(data_delete_ty)(int32_t, void *); + typedef int32_t(run_region_ty)(int32_t, void *, void **, ptrdiff_t *, + int32_t); + typedef int32_t(run_team_region_ty)(int32_t, void *, void **, ptrdiff_t *, + int32_t, int32_t, int32_t, uint64_t); + + int32_t Idx; // RTL index, index is the number of devices + // of other RTLs that were registered before, + // i.e. the OpenMP index of the first device + // to be registered with this RTL. + int32_t NumberOfDevices; // Number of devices this RTL deals with. + std::vector Devices; // one per device (NumberOfDevices in total). + + void *LibraryHandler; + +#ifdef OMPTARGET_DEBUG + std::string RTLName; +#endif + + // Functions implemented in the RTL. + is_valid_binary_ty *is_valid_binary; + number_of_devices_ty *number_of_devices; + init_device_ty *init_device; + load_binary_ty *load_binary; + data_alloc_ty *data_alloc; + data_submit_ty *data_submit; + data_retrieve_ty *data_retrieve; + data_delete_ty *data_delete; + run_region_ty *run_region; + run_team_region_ty *run_team_region; + + // Are there images associated with this RTL. + bool isUsed; + + // Mutex for thread-safety when calling RTL interface functions. + // It is easier to enforce thread-safety at the libomptarget level, + // so that developers of new RTLs do not have to worry about it. + std::mutex Mtx; + + // The existence of the mutex above makes RTLInfoTy non-copyable. + // We need to provide a copy constructor explicitly. + RTLInfoTy() + : Idx(-1), NumberOfDevices(-1), Devices(), LibraryHandler(0), +#ifdef OMPTARGET_DEBUG + RTLName(), +#endif + is_valid_binary(0), number_of_devices(0), init_device(0), + load_binary(0), data_alloc(0), data_submit(0), data_retrieve(0), + data_delete(0), run_region(0), run_team_region(0), isUsed(false), + Mtx() {} + + RTLInfoTy(const RTLInfoTy &r) : Mtx() { + Idx = r.Idx; + NumberOfDevices = r.NumberOfDevices; + Devices = r.Devices; + LibraryHandler = r.LibraryHandler; +#ifdef OMPTARGET_DEBUG + RTLName = r.RTLName; +#endif + is_valid_binary = r.is_valid_binary; + number_of_devices = r.number_of_devices; + init_device = r.init_device; + load_binary = r.load_binary; + data_alloc = r.data_alloc; + data_submit = r.data_submit; + data_retrieve = r.data_retrieve; + data_delete = r.data_delete; + run_region = r.run_region; + run_team_region = r.run_team_region; + isUsed = r.isUsed; + } +}; + +/// RTLs identified in the system. +class RTLsTy { +private: + // Mutex-like object to guarantee thread-safety and unique initialization + // (i.e. the library attempts to load the RTLs (plugins) only once). + std::once_flag initFlag; + void LoadRTLs(); // not thread-safe + +public: + // List of the detected runtime libraries. + std::list AllRTLs; + + // Array of pointers to the detected runtime libraries that have compatible + // binaries. + std::vector UsedRTLs; + + explicit RTLsTy() {} + + // Register a shared library with all (compatible) RTLs. + void RegisterLib(__tgt_bin_desc *desc); + + // Unregister a shared library from all RTLs. + void UnregisterLib(__tgt_bin_desc *desc); +}; +extern RTLsTy RTLs; +extern std::mutex RTLsMtx; + + +/// Map between the host entry begin and the translation table. Each +/// registered library gets one TranslationTable. Use the map from +/// __tgt_offload_entry so that we may quickly determine whether we +/// are trying to (re)register an existing lib or really have a new one. +struct TranslationTable { + __tgt_target_table HostTable; + + // Image assigned to a given device. + std::vector<__tgt_device_image *> TargetsImages; // One image per device ID. + + // Table of entry points or NULL if it was not already computed. + std::vector<__tgt_target_table *> TargetsTable; // One table per device ID. +}; +typedef std::map<__tgt_offload_entry *, TranslationTable> + HostEntriesBeginToTransTableTy; +extern HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable; +extern std::mutex TrlTblMtx; + +/// Map between the host ptr and a table index +struct TableMap { + TranslationTable *Table; // table associated with the host ptr. + uint32_t Index; // index in which the host ptr translated entry is found. + TableMap() : Table(0), Index(0) {} + TableMap(TranslationTable *table, uint32_t index) + : Table(table), Index(index) {} +}; +typedef std::map HostPtrToTableMapTy; +extern HostPtrToTableMapTy HostPtrToTableMap; +extern std::mutex TblMapMtx; + +#endif Index: libomptarget/src/rtl.cpp =================================================================== --- /dev/null +++ libomptarget/src/rtl.cpp @@ -0,0 +1,366 @@ +//===----------- rtl.cpp - Target independent OpenMP target RTL -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// +// +// Functionality for handling RTL plugins. +// +//===----------------------------------------------------------------------===// + +#include "device.h" +#include "private.h" +#include "rtl.h" + +#include +#include +#include +#include + +// List of all plugins that can support offloading. +static const char *RTLNames[] = { + /* PowerPC target */ "libomptarget.rtl.ppc64.so", + /* x86_64 target */ "libomptarget.rtl.x86_64.so", + /* CUDA target */ "libomptarget.rtl.cuda.so", + /* AArch64 target */ "libomptarget.rtl.aarch64.so"}; + +RTLsTy RTLs; +std::mutex RTLsMtx; + +HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable; +std::mutex TrlTblMtx; + +HostPtrToTableMapTy HostPtrToTableMap; +std::mutex TblMapMtx; + +void RTLsTy::LoadRTLs() { +#ifdef OMPTARGET_DEBUG + if (char *envStr = getenv("LIBOMPTARGET_DEBUG")) { + DebugLevel = std::stoi(envStr); + } +#endif // OMPTARGET_DEBUG + + // Parse environment variable OMP_TARGET_OFFLOAD (if set) + char *envStr = getenv("OMP_TARGET_OFFLOAD"); + if (envStr && !strcmp(envStr, "DISABLED")) { + DP("Target offloading disabled by environment\n"); + return; + } + + DP("Loading RTLs...\n"); + + // Attempt to open all the plugins and, if they exist, check if the interface + // is correct and if they are supporting any devices. + for (auto *Name : RTLNames) { + DP("Loading library '%s'...\n", Name); + void *dynlib_handle = dlopen(Name, RTLD_NOW); + + if (!dynlib_handle) { + // Library does not exist or cannot be found. + DP("Unable to load library '%s': %s!\n", Name, dlerror()); + continue; + } + + DP("Successfully loaded library '%s'!\n", Name); + + // Retrieve the RTL information from the runtime library. + RTLInfoTy R; + + R.LibraryHandler = dynlib_handle; + R.isUsed = false; + +#ifdef OMPTARGET_DEBUG + R.RTLName = Name; +#endif + + if (!(*((void**) &R.is_valid_binary) = dlsym( + dynlib_handle, "__tgt_rtl_is_valid_binary"))) + continue; + if (!(*((void**) &R.number_of_devices) = dlsym( + dynlib_handle, "__tgt_rtl_number_of_devices"))) + continue; + if (!(*((void**) &R.init_device) = dlsym( + dynlib_handle, "__tgt_rtl_init_device"))) + continue; + if (!(*((void**) &R.load_binary) = dlsym( + dynlib_handle, "__tgt_rtl_load_binary"))) + continue; + if (!(*((void**) &R.data_alloc) = dlsym( + dynlib_handle, "__tgt_rtl_data_alloc"))) + continue; + if (!(*((void**) &R.data_submit) = dlsym( + dynlib_handle, "__tgt_rtl_data_submit"))) + continue; + if (!(*((void**) &R.data_retrieve) = dlsym( + dynlib_handle, "__tgt_rtl_data_retrieve"))) + continue; + if (!(*((void**) &R.data_delete) = dlsym( + dynlib_handle, "__tgt_rtl_data_delete"))) + continue; + if (!(*((void**) &R.run_region) = dlsym( + dynlib_handle, "__tgt_rtl_run_target_region"))) + continue; + if (!(*((void**) &R.run_team_region) = dlsym( + dynlib_handle, "__tgt_rtl_run_target_team_region"))) + continue; + + // No devices are supported by this RTL? + if (!(R.NumberOfDevices = R.number_of_devices())) { + DP("No devices supported in this RTL\n"); + continue; + } + + DP("Registering RTL %s supporting %d devices!\n", + R.RTLName.c_str(), R.NumberOfDevices); + + // The RTL is valid! Will save the information in the RTLs list. + AllRTLs.push_back(R); + } + + DP("RTLs loaded!\n"); + + return; +} + +//////////////////////////////////////////////////////////////////////////////// +// Functionality for registering libs + +static void RegisterImageIntoTranslationTable(TranslationTable &TT, + RTLInfoTy &RTL, __tgt_device_image *image) { + + // same size, as when we increase one, we also increase the other. + assert(TT.TargetsTable.size() == TT.TargetsImages.size() && + "We should have as many images as we have tables!"); + + // Resize the Targets Table and Images to accommodate the new targets if + // required + unsigned TargetsTableMinimumSize = RTL.Idx + RTL.NumberOfDevices; + + if (TT.TargetsTable.size() < TargetsTableMinimumSize) { + TT.TargetsImages.resize(TargetsTableMinimumSize, 0); + TT.TargetsTable.resize(TargetsTableMinimumSize, 0); + } + + // Register the image in all devices for this target type. + for (int32_t i = 0; i < RTL.NumberOfDevices; ++i) { + // If we are changing the image we are also invalidating the target table. + if (TT.TargetsImages[RTL.Idx + i] != image) { + TT.TargetsImages[RTL.Idx + i] = image; + TT.TargetsTable[RTL.Idx + i] = 0; // lazy initialization of target table. + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Functionality for registering Ctors/Dtors + +static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc, + __tgt_device_image *img, RTLInfoTy *RTL) { + + for (int32_t i = 0; i < RTL->NumberOfDevices; ++i) { + DeviceTy &Device = Devices[RTL->Idx + i]; + Device.PendingGlobalsMtx.lock(); + Device.HasPendingGlobals = true; + for (__tgt_offload_entry *entry = img->EntriesBegin; + entry != img->EntriesEnd; ++entry) { + if (entry->flags & OMP_DECLARE_TARGET_CTOR) { + DP("Adding ctor " DPxMOD " to the pending list.\n", + DPxPTR(entry->addr)); + Device.PendingCtorsDtors[desc].PendingCtors.push_back(entry->addr); + } else if (entry->flags & OMP_DECLARE_TARGET_DTOR) { + // Dtors are pushed in reverse order so they are executed from end + // to beginning when unregistering the library! + DP("Adding dtor " DPxMOD " to the pending list.\n", + DPxPTR(entry->addr)); + Device.PendingCtorsDtors[desc].PendingDtors.push_front(entry->addr); + } + + if (entry->flags & OMP_DECLARE_TARGET_LINK) { + DP("The \"link\" attribute is not yet supported!\n"); + } + } + Device.PendingGlobalsMtx.unlock(); + } +} + +void RTLsTy::RegisterLib(__tgt_bin_desc *desc) { + // Attempt to load all plugins available in the system. + std::call_once(initFlag, &RTLsTy::LoadRTLs, this); + + RTLsMtx.lock(); + // Register the images with the RTLs that understand them, if any. + for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { + // Obtain the image. + __tgt_device_image *img = &desc->DeviceImages[i]; + + RTLInfoTy *FoundRTL = NULL; + + // Scan the RTLs that have associated images until we find one that supports + // the current image. + for (auto &R : RTLs.AllRTLs) { + if (!R.is_valid_binary(img)) { + DP("Image " DPxMOD " is NOT compatible with RTL %s!\n", + DPxPTR(img->ImageStart), R.RTLName.c_str()); + continue; + } + + DP("Image " DPxMOD " is compatible with RTL %s!\n", + DPxPTR(img->ImageStart), R.RTLName.c_str()); + + // If this RTL is not already in use, initialize it. + if (!R.isUsed) { + // Initialize the device information for the RTL we are about to use. + DeviceTy device(&R); + + size_t start = Devices.size(); + Devices.resize(start + R.NumberOfDevices, device); + for (int32_t device_id = 0; device_id < R.NumberOfDevices; + device_id++) { + // global device ID + Devices[start + device_id].DeviceID = start + device_id; + // RTL local device ID + Devices[start + device_id].RTLDeviceID = device_id; + + // Save pointer to device in RTL in case we want to unregister the RTL + R.Devices.push_back(&Devices[start + device_id]); + } + + // Initialize the index of this RTL and save it in the used RTLs. + R.Idx = (RTLs.UsedRTLs.empty()) + ? 0 + : RTLs.UsedRTLs.back()->Idx + + RTLs.UsedRTLs.back()->NumberOfDevices; + assert((size_t) R.Idx == start && + "RTL index should equal the number of devices used so far."); + R.isUsed = true; + RTLs.UsedRTLs.push_back(&R); + + DP("RTL " DPxMOD " has index %d!\n", DPxPTR(R.LibraryHandler), R.Idx); + } + + // Initialize (if necessary) translation table for this library. + TrlTblMtx.lock(); + if(!HostEntriesBeginToTransTable.count(desc->HostEntriesBegin)){ + TranslationTable &tt = + HostEntriesBeginToTransTable[desc->HostEntriesBegin]; + tt.HostTable.EntriesBegin = desc->HostEntriesBegin; + tt.HostTable.EntriesEnd = desc->HostEntriesEnd; + } + + // Retrieve translation table for this library. + TranslationTable &TransTable = + HostEntriesBeginToTransTable[desc->HostEntriesBegin]; + + DP("Registering image " DPxMOD " with RTL %s!\n", + DPxPTR(img->ImageStart), R.RTLName.c_str()); + RegisterImageIntoTranslationTable(TransTable, R, img); + TrlTblMtx.unlock(); + FoundRTL = &R; + + // Load ctors/dtors for static objects + RegisterGlobalCtorsDtorsForImage(desc, img, FoundRTL); + + // if an RTL was found we are done - proceed to register the next image + break; + } + + if (!FoundRTL) { + DP("No RTL found for image " DPxMOD "!\n", DPxPTR(img->ImageStart)); + } + } + RTLsMtx.unlock(); + + + DP("Done registering entries!\n"); +} + +void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) { + DP("Unloading target library!\n"); + + RTLsMtx.lock(); + // Find which RTL understands each image, if any. + for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { + // Obtain the image. + __tgt_device_image *img = &desc->DeviceImages[i]; + + RTLInfoTy *FoundRTL = NULL; + + // Scan the RTLs that have associated images until we find one that supports + // the current image. We only need to scan RTLs that are already being used. + for (auto *R : RTLs.UsedRTLs) { + + assert(R->isUsed && "Expecting used RTLs."); + + if (!R->is_valid_binary(img)) { + DP("Image " DPxMOD " is NOT compatible with RTL " DPxMOD "!\n", + DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); + continue; + } + + DP("Image " DPxMOD " is compatible with RTL " DPxMOD "!\n", + DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); + + FoundRTL = R; + + // Execute dtors for static objects if the device has been used, i.e. + // if its PendingCtors list has been emptied. + for (int32_t i = 0; i < FoundRTL->NumberOfDevices; ++i) { + DeviceTy &Device = Devices[FoundRTL->Idx + i]; + Device.PendingGlobalsMtx.lock(); + if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) { + for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) { + int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, 1, + 1, true /*team*/); + if (rc != OFFLOAD_SUCCESS) { + DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor)); + } + } + // Remove this library's entry from PendingCtorsDtors + Device.PendingCtorsDtors.erase(desc); + } + Device.PendingGlobalsMtx.unlock(); + } + + DP("Unregistered image " DPxMOD " from RTL " DPxMOD "!\n", + DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); + + break; + } + + // if no RTL was found proceed to unregister the next image + if (!FoundRTL){ + DP("No RTLs in use support the image " DPxMOD "!\n", + DPxPTR(img->ImageStart)); + } + } + RTLsMtx.unlock(); + DP("Done unregistering images!\n"); + + // Remove entries from HostPtrToTableMap + TblMapMtx.lock(); + for (__tgt_offload_entry *cur = desc->HostEntriesBegin; + cur < desc->HostEntriesEnd; ++cur) { + HostPtrToTableMap.erase(cur->addr); + } + + // Remove translation table for this descriptor. + auto tt = HostEntriesBeginToTransTable.find(desc->HostEntriesBegin); + if (tt != HostEntriesBeginToTransTable.end()) { + DP("Removing translation table for descriptor " DPxMOD "\n", + DPxPTR(desc->HostEntriesBegin)); + HostEntriesBeginToTransTable.erase(tt); + } else { + DP("Translation table for descriptor " DPxMOD " cannot be found, probably " + "it has been already removed.\n", DPxPTR(desc->HostEntriesBegin)); + } + + TblMapMtx.unlock(); + + // TODO: Remove RTL and the devices it manages if it's not used anymore? + // TODO: Write some RTL->unload_image(...) function? + + DP("Done unregistering library!\n"); +}