diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -363,9 +363,10 @@ /// generic kernel class. struct AMDGPUKernelTy : public GenericKernelTy { /// Create an AMDGPU kernel with a name and an execution mode. - AMDGPUKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode) + AMDGPUKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode, + uint16_t CodeObjectVersion) : GenericKernelTy(Name, ExecutionMode), - ImplicitArgsSize(sizeof(utils::AMDGPUImplicitArgsTy)) {} + ImplicitArgsSize(utils::getImplicitArgsSize(CodeObjectVersion)) {} /// Initialize the AMDGPU kernel. Error initImpl(GenericDeviceTy &Device, DeviceImageTy &Image) override { @@ -1652,9 +1653,17 @@ return Plugin::error("Invalid execution mode %d for '%s'", ExecModeGlobal.getValue(), KernelEntry.name); + uint16_t Cov = 0; + if (auto Err = GHandler.readCOVFromELFHeader(*this, Image, Cov)) { + return Plugin::error("Failed to read code object version from the ELF" + "header for the kernel '%s'\n", + KernelEntry.name); + } + // Allocate and initialize the AMDGPU kernel. AMDGPUKernelTy *AMDKernel = Plugin::get().allocate(); - new (AMDKernel) AMDGPUKernelTy(KernelEntry.name, ExecModeGlobal.getValue()); + new (AMDKernel) + AMDGPUKernelTy(KernelEntry.name, ExecModeGlobal.getValue(), Cov); return AMDKernel; } @@ -2396,9 +2405,7 @@ return Err; // Initialize implicit arguments. - utils::AMDGPUImplicitArgsTy *ImplArgs = - reinterpret_cast( - static_cast(AllArgs) + KernelArgsSize); + uint8_t *ImplArgs = static_cast(AllArgs) + KernelArgsSize; // Initialize the implicit arguments to zero. std::memset(ImplArgs, 0, ImplicitArgsSize); diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h --- a/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h @@ -24,19 +24,9 @@ namespace plugin { namespace utils { -// The implicit arguments of AMDGPU kernels. -struct AMDGPUImplicitArgsTy { - uint64_t OffsetX; - uint64_t OffsetY; - uint64_t OffsetZ; - uint64_t HostcallPtr; - uint64_t Unused0; - uint64_t Unused1; - uint64_t Unused2; -}; - -static_assert(sizeof(AMDGPUImplicitArgsTy) == 56, - "Unexpected size of implicit arguments"); +const uint16_t getImplicitArgsSize(uint16_t Version) { + return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5 ? 56 : 256; +} /// Parse a TargetID to get processor arch and feature map. /// Returns processor subarch. diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/GlobalHandler.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/GlobalHandler.h --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/GlobalHandler.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/GlobalHandler.h @@ -130,6 +130,9 @@ Error readGlobalFromImage(GenericDeviceTy &Device, DeviceImageTy &Image, const GlobalTy &HostGlobal); + Error readCOVFromELFHeader(GenericDeviceTy &Device, DeviceImageTy &Image, + uint16_t &CodeObjectVersion); + /// Get the address and size of a global from the device. Address is return in /// \p DeviceGlobal, the global name and expected size are passed in /// \p DeviceGlobal. diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/GlobalHandler.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/GlobalHandler.cpp --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/GlobalHandler.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/GlobalHandler.cpp @@ -20,6 +20,7 @@ using namespace omp; using namespace target; using namespace plugin; +using namespace llvm::ELF; const ELF64LEObjectFile * GenericGlobalHandlerTy::getOrCreateELFObjectFile(const GenericDeviceTy &Device, @@ -136,6 +137,27 @@ return getGlobalMetadataFromELF(Image, **SymOrErr, **SecOrErr, ImageGlobal); } +Error GenericGlobalHandlerTy::readCOVFromELFHeader( + GenericDeviceTy &Device, DeviceImageTy &Image, + uint16_t &CodeObjectVersion) { + + // Get the ELF object file for the image. Notice the ELF object may already + // be created in previous calls, so we can reuse it. + const ELF64LEObjectFile *ELFObj = getOrCreateELFObjectFile(Device, Image); + if (!ELFObj) + return Plugin::error("Unable to create ELF object for image %p", + Image.getStart()); + + // Get the header and lookup into e_ident. + CodeObjectVersion = + (uint8_t)ELFObj->getELFFile().getHeader().e_ident[EI_ABIVERSION]; + if (CodeObjectVersion == 0) + return Plugin::error("Failed to get code object version from the ELF" + "header\n"); + + return Plugin::success(); +} + Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device, DeviceImageTy &Image, const GlobalTy &HostGlobal) { diff --git a/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h --- a/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h @@ -57,6 +57,4 @@ COV5_HEAPV1_PTR_SIZE = 8 }; -const uint16_t getImplicitArgsSize(uint16_t Version); - #endif