diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -28,10 +29,17 @@ #include "Utilities.h" #include "UtilitiesRTL.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { namespace omp { @@ -1519,6 +1527,11 @@ if (auto Err = initMemoryPools()) return Err; + char GPUName[64]; + if (auto Err = getDeviceAttr(HSA_AGENT_INFO_NAME, GPUName)) + return Err; + Arch = GPUName; + // Get the wavefront size. uint32_t WavefrontSize = 0; if (auto Err = getDeviceAttr(HSA_AGENT_INFO_WAVEFRONT_SIZE, WavefrontSize)) @@ -1626,6 +1639,61 @@ return Plugin::success(); } + Expected> + doJITPostProcessing(std::unique_ptr MB) const override { + + // TODO: We should try to avoid materialization but there seems to be no + // good linker interface w/o file i/o. + SmallString<128> LinkerOutputFilePath; + std::error_code EC = sys::fs::createTemporaryFile( + "amdgpu-pre-link-jit", ".out", LinkerOutputFilePath); + if (EC) + return createStringError(EC, + "Failed to create temporary file for linker"); + + SmallString<128> LinkerInputFilePath = LinkerOutputFilePath; + LinkerInputFilePath.pop_back_n(2); + + auto FD = raw_fd_ostream(LinkerInputFilePath.data(), EC); + if (EC) + return createStringError(EC, "Failed to open temporary file for linker"); + FD.write(MB->getBufferStart(), MB->getBufferSize()); + FD.close(); + + const auto &ErrorOrPath = sys::findProgramByName("lld"); + if (!ErrorOrPath) + return createStringError(inconvertibleErrorCode(), + "Failed to find `lld` on the PATH."); + + std::string LLDPath = ErrorOrPath.get(); + INFO(OMP_INFOTYPE_PLUGIN_KERNEL, getDeviceId(), + "Using `%s` to link JITed amdgcn ouput.", LLDPath.c_str()); + + std::string MCPU = "-plugin-opt=mcpu=" + getArch(); + + StringRef Args[] = {LLDPath, + "-flavor", + "gnu", + "--no-undefined", + "-shared", + MCPU, + "-o", + LinkerOutputFilePath.data(), + LinkerInputFilePath.data()}; + + std::string Error; + int RC = sys::ExecuteAndWait(LLDPath, Args, std::nullopt, {}, 0, 0, &Error); + if (RC) + return createStringError(inconvertibleErrorCode(), + "Linking optimized bitcode failed: %s", + Error.c_str()); + + return std::move( + MemoryBuffer::getFileOrSTDIN(LinkerOutputFilePath.data()).get()); + } + + std::string getArch() const override { return Arch; } + /// Allocate and construct an AMDGPU kernel. Expected constructKernelEntry(const __tgt_offload_entry &KernelEntry, @@ -2027,6 +2095,9 @@ /// The agent handler corresponding to the device. hsa_agent_t Agent; + /// The GPU architecture. + std::string Arch; + /// Reference to the host device. AMDHostDeviceTy &HostDevice; @@ -2255,6 +2326,8 @@ return Plugin::check(Status, "Error in hsa_shut_down: %s"); } + Triple::ArchType getTripleArch() const override { return Triple::amdgcn; } + /// Get the ELF code for recognizing the compatible image binary. uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; }