diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -479,6 +479,9 @@ /// RTLs identified on the host RTLsTy RTLs; + /// Device images retrieved from the offload binary. + std::vector<__tgt_device_binary> Binaries; + /// Devices associated with RTLs std::vector> Devices; std::mutex RTLsMtx; ///< For RTLs and Devices diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -127,6 +127,21 @@ __tgt_offload_entry *EntriesEnd; // End of table (non inclusive) }; +/// This struct represents per-target information passed to the plugins. +struct __tgt_image_info { + const char *Arch; +}; + +/// This struct is a record of the device image binary information parsed from +/// device image. +struct __tgt_device_binary { + void *ImageStart; // Pointer to the target code start + void *ImageEnd; // Pointer to the target code end + __tgt_offload_entry *EntriesBegin; // Begin of table with all target entries + __tgt_offload_entry *EntriesEnd; // End of table (non inclusive) + __tgt_image_info Info; // Struct containing metadata +}; + /// This struct is a record of all the host code that may be offloaded to a /// target. struct __tgt_bin_desc { diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h --- a/openmp/libomptarget/include/omptargetplugin.h +++ b/openmp/libomptarget/include/omptargetplugin.h @@ -29,7 +29,7 @@ // result of __tgt__rtl__load__binary to NULL. However, this is meant to be a // lightweight query to determine if the RTL is suitable for an image without // having to load the library, which can be expensive. -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image); +int32_t __tgt_rtl_is_valid_binary(__tgt_device_binary *Image); // Return an integer other than zero if the data can be exchaned from SrcDevId // to DstDevId. If it is data exchangable, the device plugin should provide @@ -58,7 +58,7 @@ // Individual entries in the table may also be NULL, when the corresponding // offload region is not supported on the target device. __tgt_target_table *__tgt_rtl_load_binary(int32_t ID, - __tgt_device_image *Image); + __tgt_device_binary *Image); // Allocate data on the particular target device, of the specified size. // HostPtr is a address of the host data the allocated target data diff --git a/openmp/libomptarget/include/rtl.h b/openmp/libomptarget/include/rtl.h --- a/openmp/libomptarget/include/rtl.h +++ b/openmp/libomptarget/include/rtl.h @@ -166,7 +166,7 @@ __tgt_target_table HostTable; // Image assigned to a given device. - std::vector<__tgt_device_image *> TargetsImages; // One image per device ID. + std::vector<__tgt_device_binary *> TargetsBinaries; // One per device ID. // Table of entry points or NULL if it was not already computed. std::vector<__tgt_target_table *> TargetsTable; // One table per device ID. diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1557,7 +1557,7 @@ return OFFLOAD_SUCCESS; } -bool elf_machine_id_is_amdgcn(__tgt_device_image *image) { +bool elf_machine_id_is_amdgcn(__tgt_device_binary *image) { const uint16_t amdgcnMachineID = 224; // EM_AMDGPU may not be in system elf.h int32_t r = elf_check_machine(image, amdgcnMachineID); if (!r) { @@ -1566,7 +1566,7 @@ return r; } -uint32_t elf_e_flags(__tgt_device_image *image) { +uint32_t elf_e_flags(__tgt_device_binary *image) { char *img_begin = (char *)image->ImageStart; size_t img_size = (char *)image->ImageEnd - img_begin; @@ -1790,11 +1790,11 @@ symbol_info si; bool valid = false; - __tgt_device_image *image; + __tgt_device_binary *image; const size_t img_size; device_environment(int device_id, int number_devices, int dynamic_mem_size, - __tgt_device_image *image, const size_t img_size) + __tgt_device_binary *image, const size_t img_size) : image(image), img_size(img_size) { host_device_env.NumDevices = number_devices; @@ -1902,7 +1902,7 @@ } // namespace core extern "C" { -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { +int32_t __tgt_rtl_is_valid_binary(__tgt_device_binary *image) { return elf_machine_id_is_amdgcn(image); } @@ -2078,10 +2078,10 @@ } static __tgt_target_table * -__tgt_rtl_load_binary_locked(int32_t device_id, __tgt_device_image *image); +__tgt_rtl_load_binary_locked(int32_t device_id, __tgt_device_binary *image); __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, - __tgt_device_image *image) { + __tgt_device_binary *image) { DeviceInfo.load_run_lock.lock(); __tgt_target_table *res = __tgt_rtl_load_binary_locked(device_id, image); DeviceInfo.load_run_lock.unlock(); @@ -2089,7 +2089,7 @@ } __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, - __tgt_device_image *image) { + __tgt_device_binary *image) { // This function loads the device image onto gpu[device_id] and does other // per-image initialization work. Specifically: // diff --git a/openmp/libomptarget/plugins/common/elf_common/elf_common.h b/openmp/libomptarget/plugins/common/elf_common/elf_common.h --- a/openmp/libomptarget/plugins/common/elf_common/elf_common.h +++ b/openmp/libomptarget/plugins/common/elf_common/elf_common.h @@ -18,10 +18,11 @@ /// Return non-zero, if the given \p image is an ELF object, which /// e_machine matches \p target_id; return zero otherwise. -EXTERN int32_t elf_check_machine(__tgt_device_image *image, uint16_t target_id); +EXTERN int32_t elf_check_machine(__tgt_device_binary *image, + uint16_t target_id); /// Return non-zero, if the given \p image is an ET_DYN ELF object; /// return zero otherwise. -EXTERN int32_t elf_is_dynamic(__tgt_device_image *image); +EXTERN int32_t elf_is_dynamic(__tgt_device_binary *image); #endif // LLVM_OPENMP_LIBOMPTARGET_PLUGINS_COMMON_ELF_COMMON_ELF_COMMON_H diff --git a/openmp/libomptarget/plugins/common/elf_common/elf_common.cpp b/openmp/libomptarget/plugins/common/elf_common/elf_common.cpp --- a/openmp/libomptarget/plugins/common/elf_common/elf_common.cpp +++ b/openmp/libomptarget/plugins/common/elf_common/elf_common.cpp @@ -33,7 +33,8 @@ /// created from this range, otherwise, return 0. /// If \p Callback is invoked, then return whatever value \p Callback returns. template -static int32_t withBytesAsElf(char *BytesBegin, char *BytesEnd, F Callback) { +static int32_t withBytesAsElf(const char *BytesBegin, const char *BytesEnd, + F Callback) { size_t Size = BytesEnd - BytesBegin; StringRef StrBuf(BytesBegin, Size); @@ -67,22 +68,22 @@ } // Check whether an image is valid for execution on target_id -int32_t elf_check_machine(__tgt_device_image *image, uint16_t target_id) { +int32_t elf_check_machine(__tgt_device_binary *image, uint16_t target_id) { auto CheckMachine = [target_id](const ELFObjectFileBase *Object) { return target_id == Object->getEMachine(); }; - return withBytesAsElf(reinterpret_cast(image->ImageStart), - reinterpret_cast(image->ImageEnd), + return withBytesAsElf(reinterpret_cast(image->ImageStart), + reinterpret_cast(image->ImageEnd), CheckMachine); } -int32_t elf_is_dynamic(__tgt_device_image *image) { +int32_t elf_is_dynamic(__tgt_device_binary *image) { auto CheckDynType = [](const ELFObjectFileBase *Object) { uint16_t Type = Object->getEType(); DP("ELF Type: %d\n", Type); return Type == ET_DYN; }; - return withBytesAsElf(reinterpret_cast(image->ImageStart), - reinterpret_cast(image->ImageEnd), + return withBytesAsElf(reinterpret_cast(image->ImageStart), + reinterpret_cast(image->ImageEnd), CheckDynType); } diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -789,7 +789,7 @@ } __tgt_target_table *loadBinary(const int DeviceId, - const __tgt_device_image *Image) { + const __tgt_device_binary *Image) { // Clear the offload table as we are going to create a new one. clearOffloadEntriesTable(DeviceId); @@ -1483,7 +1483,7 @@ extern "C" { #endif -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { +int32_t __tgt_rtl_is_valid_binary(__tgt_device_binary *image) { return elf_check_machine(image, /* EM_CUDA */ 190); } @@ -1518,7 +1518,7 @@ } __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, - __tgt_device_image *image) { + __tgt_device_binary *image) { assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) diff --git a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp --- a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp +++ b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp @@ -112,7 +112,7 @@ extern "C" { #endif -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { +int32_t __tgt_rtl_is_valid_binary(__tgt_device_binary *image) { // If we don't have a valid ELF ID we can just fail. #if TARGET_ELF_ID < 1 return 0; @@ -126,7 +126,7 @@ int32_t __tgt_rtl_init_device(int32_t device_id) { return OFFLOAD_SUCCESS; } __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, - __tgt_device_image *image) { + __tgt_device_binary *image) { DP("Dev %d: load binary from " DPxMOD " image\n", device_id, DPxPTR(image->ImageStart)); diff --git a/openmp/libomptarget/plugins/remote/include/Utils.h b/openmp/libomptarget/plugins/remote/include/Utils.h --- a/openmp/libomptarget/plugins/remote/include/Utils.h +++ b/openmp/libomptarget/plugins/remote/include/Utils.h @@ -88,7 +88,7 @@ /// track of already copied device images. void unloadTargetBinaryDescription( const TargetBinaryDescription *Request, __tgt_bin_desc *Desc, - std::unordered_map + std::unordered_map &HostToRemoteDeviceImage); /// Frees argument as constructed by loadTargetBinaryDescription @@ -113,7 +113,7 @@ /// Loads tgt_target_table into a TargetTable protobuf message. void loadTargetTable(__tgt_target_table *Table, TargetTable &TableResponse, - __tgt_device_image *Image); + __tgt_device_binary *Image); /// Unloads from a target_table from protobuf. void unloadTargetTable( @@ -127,7 +127,7 @@ void dump(__tgt_offload_entry *Entry); void dump(TargetOffloadEntry Entry); void dump(__tgt_target_table *Table); -void dump(__tgt_device_image *Image); +void dump(__tgt_device_binary *Image); } // namespace RemoteOffloading #endif diff --git a/openmp/libomptarget/plugins/remote/lib/Utils.cpp b/openmp/libomptarget/plugins/remote/lib/Utils.cpp --- a/openmp/libomptarget/plugins/remote/lib/Utils.cpp +++ b/openmp/libomptarget/plugins/remote/lib/Utils.cpp @@ -32,7 +32,7 @@ } // Copy Device Images and Device Offload Entries - __tgt_device_image *CurImage = Desc->DeviceImages; + __tgt_device_binary *CurImage = Desc->DeviceImages; for (auto I = 0; I < Desc->NumDeviceImages; I++, CurImage++) { auto *Image = Request.add_images(); auto Size = (char *)CurImage->ImageEnd - (char *)CurImage->ImageStart; @@ -65,11 +65,11 @@ void unloadTargetBinaryDescription( const TargetBinaryDescription *Request, __tgt_bin_desc *Desc, - std::unordered_map + std::unordered_map &HostToRemoteDeviceImage) { std::unordered_map CopiedOffloadEntries; Desc->NumDeviceImages = Request->images_size(); - Desc->DeviceImages = new __tgt_device_image[Desc->NumDeviceImages]; + Desc->DeviceImages = new __tgt_device_binary[Desc->NumDeviceImages]; if (Request->entries_size()) Desc->HostEntriesBegin = new __tgt_offload_entry[Request->entries_size()]; @@ -90,7 +90,7 @@ Desc->HostEntriesEnd = CurEntry; // Copy Device Images and Device Offload Entries - __tgt_device_image *CurImage = Desc->DeviceImages; + __tgt_device_binary *CurImage = Desc->DeviceImages; auto ImageItr = Request->image_ptrs().begin(); for (auto Image : Request->images()) { // Copy Device Offload Entries @@ -134,7 +134,7 @@ } void freeTargetBinaryDescription(__tgt_bin_desc *Desc) { - __tgt_device_image *CurImage = Desc->DeviceImages; + __tgt_device_binary *CurImage = Desc->DeviceImages; for (auto I = 0; I < Desc->NumDeviceImages; I++, CurImage++) delete[](uint64_t *) CurImage->ImageStart; @@ -157,7 +157,7 @@ } void loadTargetTable(__tgt_target_table *Table, TargetTable &TableResponse, - __tgt_device_image *Image) { + __tgt_device_binary *Image) { auto *ImageEntry = Image->EntriesBegin; for (__tgt_offload_entry *CurEntry = Table->EntriesBegin; CurEntry != Table->EntriesEnd; CurEntry++, ImageEntry++) { @@ -279,7 +279,7 @@ static_cast((Entry.data().c_str() + Entry.data().size()))); } -void dump(__tgt_device_image *Image) { +void dump(__tgt_device_binary *Image) { dump(Image->ImageStart, Image->ImageEnd); __tgt_offload_entry *EntryItr = Image->EntriesBegin; for (; EntryItr != Image->EntriesEnd; EntryItr++) diff --git a/openmp/libomptarget/plugins/remote/server/Server.h b/openmp/libomptarget/plugins/remote/server/Server.h --- a/openmp/libomptarget/plugins/remote/server/Server.h +++ b/openmp/libomptarget/plugins/remote/server/Server.h @@ -38,7 +38,7 @@ private: int32_t mapHostRTLDeviceId(int32_t RTLDeviceID); - std::unordered_map + std::unordered_map HostToRemoteDeviceImage; std::unordered_map> Descriptions; diff --git a/openmp/libomptarget/plugins/remote/server/Server.cpp b/openmp/libomptarget/plugins/remote/server/Server.cpp --- a/openmp/libomptarget/plugins/remote/server/Server.cpp +++ b/openmp/libomptarget/plugins/remote/server/Server.cpp @@ -71,7 +71,7 @@ Status RemoteOffloadImpl::IsValidBinary(ServerContext *Context, const TargetDeviceImagePtr *DeviceImage, I32 *IsValid) { - __tgt_device_image *Image = + __tgt_device_binary *Image = HostToRemoteDeviceImage[(void *)DeviceImage->image_ptr()]; IsValid->set_number(0); @@ -126,7 +126,7 @@ Status RemoteOffloadImpl::LoadBinary(ServerContext *Context, const Binary *Binary, TargetTable *Reply) { - __tgt_device_image *Image = + __tgt_device_binary *Image = HostToRemoteDeviceImage[(void *)Binary->image_ptr()]; Table = PM->Devices[Binary->device_id()]->RTL->load_binary( diff --git a/openmp/libomptarget/plugins/remote/src/Client.h b/openmp/libomptarget/plugins/remote/src/Client.h --- a/openmp/libomptarget/plugins/remote/src/Client.h +++ b/openmp/libomptarget/plugins/remote/src/Client.h @@ -69,13 +69,13 @@ int32_t registerLib(__tgt_bin_desc *Desc); int32_t unregisterLib(__tgt_bin_desc *Desc); - int32_t isValidBinary(__tgt_device_image *Image); + int32_t isValidBinary(__tgt_device_binary *Image); int32_t getNumberOfDevices(); int32_t initDevice(int32_t DeviceId); int32_t initRequires(int64_t RequiresFlags); - __tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image); + __tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_binary *Image); void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr); int32_t dataDelete(int32_t DeviceId, void *TgtPtr); @@ -124,13 +124,13 @@ int32_t registerLib(__tgt_bin_desc *Desc); int32_t unregisterLib(__tgt_bin_desc *Desc); - int32_t isValidBinary(__tgt_device_image *Image); + int32_t isValidBinary(__tgt_device_binary *Image); int32_t getNumberOfDevices(); int32_t initDevice(int32_t DeviceId); int32_t initRequires(int64_t RequiresFlags); - __tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image); + __tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_binary *Image); void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr); int32_t dataDelete(int32_t DeviceId, void *TgtPtr); diff --git a/openmp/libomptarget/plugins/remote/src/Client.cpp b/openmp/libomptarget/plugins/remote/src/Client.cpp --- a/openmp/libomptarget/plugins/remote/src/Client.cpp +++ b/openmp/libomptarget/plugins/remote/src/Client.cpp @@ -110,7 +110,7 @@ /* Error Value */ 1); } -int32_t RemoteOffloadClient::isValidBinary(__tgt_device_image *Image) { +int32_t RemoteOffloadClient::isValidBinary(__tgt_device_binary *Image) { return remoteCall( /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { @@ -209,8 +209,8 @@ /* Error Value */ -1); } -__tgt_target_table *RemoteOffloadClient::loadBinary(int32_t DeviceId, - __tgt_device_image *Image) { +__tgt_target_table * +RemoteOffloadClient::loadBinary(int32_t DeviceId, __tgt_device_binary *Image) { return remoteCall( /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { @@ -593,7 +593,7 @@ return Ret; } -int32_t RemoteClientManager::isValidBinary(__tgt_device_image *Image) { +int32_t RemoteClientManager::isValidBinary(__tgt_device_binary *Image) { int32_t ClientIdx = 0; for (auto &Client : Clients) { if (auto Ret = Client.isValidBinary(Image)) @@ -637,8 +637,8 @@ return RequiresFlags; } -__tgt_target_table *RemoteClientManager::loadBinary(int32_t DeviceId, - __tgt_device_image *Image) { +__tgt_target_table * +RemoteClientManager::loadBinary(int32_t DeviceId, __tgt_device_binary *Image) { int32_t ClientIdx, DeviceIdx; std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); return Clients[ClientIdx].loadBinary(DeviceIdx, Image); diff --git a/openmp/libomptarget/plugins/remote/src/rtl.cpp b/openmp/libomptarget/plugins/remote/src/rtl.cpp --- a/openmp/libomptarget/plugins/remote/src/rtl.cpp +++ b/openmp/libomptarget/plugins/remote/src/rtl.cpp @@ -49,7 +49,7 @@ return Manager->unregisterLib(Desc); } -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { +int32_t __tgt_rtl_is_valid_binary(__tgt_device_binary *Image) { return Manager->isValidBinary(Image); } @@ -64,8 +64,8 @@ } __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId, - __tgt_device_image *Image) { - return Manager->loadBinary(DeviceId, (__tgt_device_image *)Image); + __tgt_device_binary *Image) { + return Manager->loadBinary(DeviceId, (__tgt_device_binary *)Image); } int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId) { diff --git a/openmp/libomptarget/plugins/ve/src/rtl.cpp b/openmp/libomptarget/plugins/ve/src/rtl.cpp --- a/openmp/libomptarget/plugins/ve/src/rtl.cpp +++ b/openmp/libomptarget/plugins/ve/src/rtl.cpp @@ -184,7 +184,7 @@ // result of __tgt__rtl__load__binary to NULL. However, this is meant to be a // lightweight query to determine if the RTL is suitable for an image without // having to load the library, which can be expensive. -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { +int32_t __tgt_rtl_is_valid_binary(__tgt_device_binary *Image) { #if TARGET_ELF_ID < 1 return 0; #else @@ -213,7 +213,7 @@ // Individual entries in the table may also be NULL, when the corresponding // offload region is not supported on the target device. __tgt_target_table *__tgt_rtl_load_binary(int32_t ID, - __tgt_device_image *Image) { + __tgt_device_binary *Image) { DP("Dev %d: load binary from " DPxMOD " image\n", ID, DPxPTR(Image->ImageStart)); diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -99,9 +99,9 @@ } // 1) get image. - assert(TransTable->TargetsImages.size() > (size_t)device_id && + assert(TransTable->TargetsBinaries.size() > (size_t)device_id && "Not expecting a device ID outside the table's bounds!"); - __tgt_device_image *img = TransTable->TargetsImages[device_id]; + __tgt_device_binary *img = TransTable->TargetsBinaries[device_id]; if (!img) { REPORT("No image loaded for device id %d.\n", device_id); rc = OFFLOAD_FAIL; @@ -114,7 +114,7 @@ if (!TargetTable) { REPORT("Unable to generate entries table for device id %d.\n", device_id); - TransTable->TargetsImages[device_id] = 0; + TransTable->TargetsBinaries[device_id] = 0; rc = OFFLOAD_FAIL; break; } @@ -129,7 +129,7 @@ REPORT( "Host and Target tables mismatch for device id %d [%zx != %zx].\n", device_id, hsize, tsize); - TransTable->TargetsImages[device_id] = 0; + TransTable->TargetsBinaries[device_id] = 0; TransTable->TargetsTable[device_id] = 0; rc = OFFLOAD_FAIL; break; diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -220,10 +220,10 @@ static void RegisterImageIntoTranslationTable(TranslationTable &TT, RTLInfoTy &RTL, - __tgt_device_image *image) { + __tgt_device_binary *img) { // same size, as when we increase one, we also increase the other. - assert(TT.TargetsTable.size() == TT.TargetsImages.size() && + assert(TT.TargetsTable.size() == TT.TargetsBinaries.size() && "We should have as many images as we have tables!"); // Resize the Targets Table and Images to accommodate the new targets if @@ -231,15 +231,15 @@ unsigned TargetsTableMinimumSize = RTL.Idx + RTL.NumberOfDevices; if (TT.TargetsTable.size() < TargetsTableMinimumSize) { - TT.TargetsImages.resize(TargetsTableMinimumSize, 0); + TT.TargetsBinaries.resize(TargetsTableMinimumSize, 0); TT.TargetsTable.resize(TargetsTableMinimumSize, 0); } // Register the image in all devices for this target type. for (int32_t i = 0; i < RTL.NumberOfDevices; ++i) { // If we are changing the image we are also invalidating the target table. - if (TT.TargetsImages[RTL.Idx + i] != image) { - TT.TargetsImages[RTL.Idx + i] = image; + if (TT.TargetsBinaries[RTL.Idx + i] != img) { + TT.TargetsBinaries[RTL.Idx + i] = img; TT.TargetsTable[RTL.Idx + i] = 0; // lazy initialization of target table. } } @@ -249,7 +249,7 @@ // Functionality for registering Ctors/Dtors static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc, - __tgt_device_image *img, + __tgt_device_binary *img, RTLInfoTy *RTL) { for (int32_t i = 0; i < RTL->NumberOfDevices; ++i) { @@ -278,6 +278,65 @@ } } +static __tgt_device_binary parseDeviceImage(__tgt_device_image *Image) { + struct Header { + uint8_t Magic[4] = {0x10, 0xFF, 0x10, 0xAD}; // 0x10FF10AD magic bytes. + uint32_t Version = 1; // Version identifier. + uint64_t Size; // Size in bytes of this entire binary. + uint64_t EntryOffset; // Offset of the metadata entry in bytes. + uint64_t EntrySize; // Size of the metadata entry in bytes. + }; + + struct Entry { + uint16_t TheImageKind; // The kind of the image stored. + uint16_t TheOffloadKind; // The producer of this image. + uint32_t Flags; // Additional flags associated with the image. + uint64_t StringOffset; // Offset in bytes to the string map. + uint64_t NumStrings; // Number of entries in the string map. + uint64_t ImageOffset; // Offset in bytes of the actual binary image. + uint64_t ImageSize; // Size in bytes of the binary image. + }; + + struct StringEntry { + uint64_t KeyOffset; // Offset of the key in the string table. + uint64_t ValueOffset; // Offset of the value in the string table. + }; + + char *Buffer = reinterpret_cast(Image->ImageStart); + // If this is not a bundled offloading binary just return the Image values. + if (strncmp(Buffer, "\x10\xFF\x10\xAD", sizeof(Header::Magic))) { + __tgt_device_binary Binary{}; + Binary.ImageStart = Image->ImageStart; + Binary.ImageEnd = Image->ImageEnd; + Binary.EntriesBegin = Image->EntriesBegin; + Binary.EntriesEnd = Image->EntriesEnd; + return Binary; + } + + const Header *TheHeader = reinterpret_cast(Buffer); + const Entry *TheEntry = + reinterpret_cast(&Buffer[TheHeader->EntryOffset]); + + assert(TheHeader->Version == 1 && "Incompatible version"); + + __tgt_device_binary Binary{}; + Binary.ImageStart = &Buffer[TheEntry->ImageOffset]; + Binary.ImageEnd = &Buffer[TheEntry->ImageOffset] + TheEntry->ImageSize; + Binary.EntriesBegin = Image->EntriesBegin; + Binary.EntriesEnd = Image->EntriesEnd; + + const StringEntry *StringMapBegin = + reinterpret_cast(&Buffer[TheEntry->StringOffset]); + for (uint64_t I = 0, E = TheEntry->NumStrings; I != E; ++I) { + const char *Key = &Buffer[StringMapBegin[I].KeyOffset]; + const char *Value = &Buffer[StringMapBegin[I].ValueOffset]; + if (strcmp(Key, "arch") == 0) + Binary.Info.Arch = Value; + } + + return Binary; +} + void RTLsTy::RegisterRequires(int64_t flags) { // TODO: add more elaborate check. // Minimal check: only set requires flags if previous value @@ -353,10 +412,14 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) { PM->RTLsMtx.lock(); + + // Make sure that we have stable pointers by reserving enough space. + Binaries.reserve(desc->NumDeviceImages); + // Register the images with the RTLs that understand them, if any. for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { - // Obtain the image. - __tgt_device_image *img = &desc->DeviceImages[i]; + PM->Binaries.emplace_back(parseDeviceImage(&desc->DeviceImages[i])); + __tgt_device_binary *img = &PM->Binaries.back(); RTLInfoTy *FoundRTL = nullptr; @@ -415,9 +478,9 @@ PM->RTLsMtx.lock(); // Find which RTL understands each image, if any. - for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { + for (int32_t i = 0; i < PM->Binaries.size(); ++i) { // Obtain the image. - __tgt_device_image *img = &desc->DeviceImages[i]; + __tgt_device_binary *img = &PM->Binaries[i]; RTLInfoTy *FoundRTL = NULL;