diff --git a/openmp/libomptarget/include/OffloadBinary.h b/openmp/libomptarget/include/OffloadBinary.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/include/OffloadBinary.h @@ -0,0 +1,96 @@ +//===---------- OffloadBinary.h - Offload Binary Parser -- C++ ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Contains a generic class meant to parse data out of an input offloading +// binary. This binary format embeds an executable or linkable image inside +// along with some other metadata formatted as a string map. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +class OffloadBinary { + struct Header { + uint8_t Magic[4] = {0x10, 0xFF, 0x10, 0xAD}; // 0x10FF10AD magic bytes. + uint32_t Version = 1; // Version identifier. + uint64_t Size; // Size in bytes of this entire binary. + uint64_t EntryOffset; // Offset of the metadata entry in bytes. + uint64_t EntrySize; // Size of the metadata entry in bytes. + }; + + struct Entry { + uint16_t TheImageKind; // The kind of the image stored. + uint16_t TheOffloadKind; // The producer of this image. + uint32_t Flags; // Additional flags associated with the image. + uint64_t StringOffset; // Offset in bytes to the string map. + uint64_t NumStrings; // Number of entries in the string map. + uint64_t ImageOffset; // Offset in bytes of the actual binary image. + uint64_t ImageSize; // Size in bytes of the binary image. + }; + + struct StringEntry { + uint64_t KeyOffset; // Offset of the key in the string table. + uint64_t ValueOffset; // Offset of the value in the string table. + }; + +public: + static bool isValid(char *Buffer) { + return !strncmp(Buffer, "\x10\xFF\x10\xAD", sizeof(Header::Magic)) && + reinterpret_cast
(Buffer)->Version == 1; + } + + static std::unique_ptr create(char *Buffer) { + if (!isValid(Buffer)) + return nullptr; + + const Header *TheHeader = reinterpret_cast(Buffer); + const Entry *TheEntry = + reinterpret_cast(&Buffer[TheHeader->EntryOffset]); + + return std::make_unique( + OffloadBinary(Buffer, TheHeader, TheEntry)); + } + + uint16_t getImageKind() const { return TheEntry->TheImageKind; } + uint16_t getOffloadKind() const { return TheEntry->TheOffloadKind; } + uint32_t getVersion() const { return TheHeader->Version; } + uint32_t getFlags() const { return TheEntry->Flags; } + uint64_t getSize() const { return TheHeader->Size; } + + const char *getTriple() const { return getString("triple"); } + const char *getArch() const { return getString("arch"); } + char *getImageStart() const { return &Buffer[TheEntry->ImageOffset]; } + char *getImageEnd() const { return getImageStart() + getImageSize(); } + uint64_t getImageSize() const { return TheEntry->ImageSize; } + +private: + OffloadBinary(char *Buffer, const Header *TheHeader, const Entry *TheEntry) + : Buffer(Buffer), TheHeader(TheHeader), TheEntry(TheEntry) { + const StringEntry *StringMapBegin = + reinterpret_cast(&Buffer[TheEntry->StringOffset]); + for (uint64_t I = 0, E = TheEntry->NumStrings; I != E; ++I) { + const char *Key = &Buffer[StringMapBegin[I].KeyOffset]; + StringData[Key] = &Buffer[StringMapBegin[I].ValueOffset]; + } + } + + const char *getString(const char *Key) const { + return (StringData.count(Key)) ? StringData.at(Key) : ""; + } + + /// Map from keys to offsets in the binary. + std::unordered_map StringData; + /// Raw pointer to the MemoryBufferRef for convenience. + char *Buffer; + /// Location of the header within the binary. + const Header *TheHeader; + /// Location of the metadata entries within the binary. + const Entry *TheEntry; +}; diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -479,6 +479,9 @@ /// RTLs identified on the host RTLsTy RTLs; + /// Executable images extracted from the input images passed to the runtime. + std::vector<__tgt_device_image> Images; + /// Devices associated with RTLs std::vector> Devices; std::mutex RTLsMtx; ///< For RTLs and Devices diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -127,6 +127,11 @@ __tgt_offload_entry *EntriesEnd; // End of table (non inclusive) }; +/// This struct contains information about a given image. +struct __tgt_image_info { + const char *Arch; +}; + /// This struct is a record of all the host code that may be offloaded to a /// target. struct __tgt_bin_desc { diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h --- a/openmp/libomptarget/include/omptargetplugin.h +++ b/openmp/libomptarget/include/omptargetplugin.h @@ -29,7 +29,8 @@ // result of __tgt__rtl__load__binary to NULL. However, this is meant to be a // lightweight query to determine if the RTL is suitable for an image without // having to load the library, which can be expensive. -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image); +int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image, + __tgt_image_info *Info); // Return an integer other than zero if the data can be exchaned from SrcDevId // to DstDevId. If it is data exchangable, the device plugin should provide diff --git a/openmp/libomptarget/include/rtl.h b/openmp/libomptarget/include/rtl.h --- a/openmp/libomptarget/include/rtl.h +++ b/openmp/libomptarget/include/rtl.h @@ -25,7 +25,7 @@ struct __tgt_bin_desc; struct RTLInfoTy { - typedef int32_t(is_valid_binary_ty)(void *); + typedef int32_t(is_valid_binary_ty)(void *, void *); typedef int32_t(is_data_exchangable_ty)(int32_t, int32_t); typedef int32_t(number_of_devices_ty)(); typedef int32_t(init_device_ty)(int32_t); diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1902,7 +1902,8 @@ } // namespace core extern "C" { -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { +int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image, + __tgt_image_info *info) { return elf_machine_id_is_amdgcn(image); } diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -1483,7 +1483,8 @@ extern "C" { #endif -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { +int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image, + __tgt_image_info *info) { return elf_check_machine(image, /* EM_CUDA */ 190); } diff --git a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp --- a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp +++ b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp @@ -112,7 +112,8 @@ extern "C" { #endif -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { +int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image, + __tgt_image_info *info) { // If we don't have a valid ELF ID we can just fail. #if TARGET_ELF_ID < 1 return 0; diff --git a/openmp/libomptarget/plugins/remote/src/rtl.cpp b/openmp/libomptarget/plugins/remote/src/rtl.cpp --- a/openmp/libomptarget/plugins/remote/src/rtl.cpp +++ b/openmp/libomptarget/plugins/remote/src/rtl.cpp @@ -49,7 +49,8 @@ return Manager->unregisterLib(Desc); } -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { +int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image, + __tgt_image_info *Info) { return Manager->isValidBinary(Image); } diff --git a/openmp/libomptarget/plugins/ve/src/rtl.cpp b/openmp/libomptarget/plugins/ve/src/rtl.cpp --- a/openmp/libomptarget/plugins/ve/src/rtl.cpp +++ b/openmp/libomptarget/plugins/ve/src/rtl.cpp @@ -184,7 +184,8 @@ // result of __tgt__rtl__load__binary to NULL. However, this is meant to be a // lightweight query to determine if the RTL is suitable for an image without // having to load the library, which can be expensive. -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { +int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image, + __tgt_image_info *Info) { #if TARGET_ELF_ID < 1 return 0; #else diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -14,6 +14,8 @@ #include "device.h" #include "private.h" +#include "OffloadBinary.h" + #include #include #include @@ -278,6 +280,23 @@ } } +static __tgt_device_image getExecutableImage(__tgt_device_image *Image) { + auto Binary = OffloadBinary::create(static_cast(Image->ImageStart)); + if (!Binary) + return *Image; + + return {Binary->getImageStart(), Binary->getImageEnd(), Image->EntriesBegin, + Image->EntriesEnd}; +} + +static __tgt_image_info getImageInfo(__tgt_device_image *Image) { + auto Binary = OffloadBinary::create(static_cast(Image->ImageStart)); + if (!Binary) + return __tgt_image_info{}; + + return __tgt_image_info{Binary->getArch()}; +} + void RTLsTy::RegisterRequires(int64_t flags) { // TODO: add more elaborate check. // Minimal check: only set requires flags if previous value @@ -353,17 +372,23 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) { PM->RTLsMtx.lock(); + + // Reserve enough space for the new images so we have stable pointers. + PM->Images.reserve(desc->NumDeviceImages); + // Register the images with the RTLs that understand them, if any. for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { // Obtain the image. - __tgt_device_image *img = &desc->DeviceImages[i]; + PM->Images.emplace_back(getExecutableImage(&desc->DeviceImages[i])); + __tgt_device_image *img = &PM->Images.back(); + __tgt_image_info info = getImageInfo(&desc->DeviceImages[i]); RTLInfoTy *FoundRTL = nullptr; // Scan the RTLs that have associated images until we find one that supports // the current image. for (auto &R : AllRTLs) { - if (!R.is_valid_binary(img)) { + if (!R.is_valid_binary(img, &info)) { DP("Image " DPxMOD " is NOT compatible with RTL %s!\n", DPxPTR(img->ImageStart), R.RTLName.c_str()); continue; @@ -417,7 +442,8 @@ // Find which RTL understands each image, if any. for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { // Obtain the image. - __tgt_device_image *img = &desc->DeviceImages[i]; + __tgt_device_image *img = &PM->Images[i]; + __tgt_image_info info = getImageInfo(&desc->DeviceImages[i]); RTLInfoTy *FoundRTL = NULL; @@ -427,7 +453,7 @@ assert(R->isUsed && "Expecting used RTLs."); - if (!R->is_valid_binary(img)) { + if (!R->is_valid_binary(img, &info)) { DP("Image " DPxMOD " is NOT compatible with RTL " DPxMOD "!\n", DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); continue;