diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -876,7 +876,7 @@ if (BitcodeOutput.size() != 1 || !SingleOutput) return createStringError(inconvertibleErrorCode(), "Cannot embed bitcode with multiple files."); - OutputFiles.push_back(static_cast(BitcodeOutput.front())); + OutputFiles.push_back(Args.MakeArgString(BitcodeOutput.front())); return Error::success(); } @@ -1188,7 +1188,8 @@ OffloadingImage TheImage{}; TheImage.TheImageKind = IMG_Object; - TheImage.TheOffloadKind = Kind; + TheImage.TheImageKind = + Args.hasArg(OPT_embed_bitcode) ? IMG_Bitcode : IMG_Object; TheImage.StringData = { {"triple", Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_triple_EQ))}, diff --git a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt --- a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt +++ b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt @@ -33,6 +33,9 @@ # Define macro with the ELF ID for this target. add_definitions("-DTARGET_ELF_ID=${elf_machine_id}") + # Define target regiple + add_definitions("-DLIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE=\"${tmachine}\"") + add_llvm_library("omptarget.rtl.${tmachine_libname}.nextgen" SHARED diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt @@ -10,8 +10,18 @@ # ##===----------------------------------------------------------------------===## +include(GNUInstallDirs) + +set(LLVM_LINK_COMPONENTS + AllTargetsAsmParsers + AllTargetsCodeGens + AllTargetsDescs + AllTargetsInfos + LTO + ) + # Plugin Interface library. -add_library(PluginInterface OBJECT PluginInterface.cpp GlobalHandler.cpp) +add_llvm_library(PluginInterface OBJECT PluginInterface.cpp GlobalHandler.cpp JIT.cpp) # Define the TARGET_NAME. add_definitions("-DTARGET_NAME=PluginInterface") @@ -19,16 +29,13 @@ # Define the DEBUG_PREFIX. add_definitions(-DDEBUG_PREFIX="PluginInterface") +llvm_update_compile_flags(PluginInterface) + set_target_properties(PluginInterface PROPERTIES POSITION_INDEPENDENT_CODE ON CXX_VISIBILITY_PRESET protected) -llvm_update_compile_flags(PluginInterface) -set(LINK_LLVM_LIBS LLVMSupport) -if (LLVM_LINK_LLVM_DYLIB) - set(LINK_LLVM_LIBS LLVM) -endif() -target_link_libraries(PluginInterface INTERFACE ${LINK_LLVM_LIBS} PRIVATE elf_common MemoryManager) -add_dependencies(PluginInterface ${LINK_LLVM_LIBS}) + +target_link_libraries(PluginInterface PRIVATE elf_common MemoryManager) target_include_directories(PluginInterface INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(PluginInterface PRIVATE ${LIBOMPTARGET_INCLUDE_DIR}) diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h @@ -0,0 +1,49 @@ +//===- JIT.h - Target independent JIT infrastructure ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H +#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" + +#include +#include +#include + +struct __tgt_device_image; + +namespace llvm { +class MemoryBuffer; + +namespace omp { +namespace jit { + +/// Function type for a callback that will be called after the backend is +/// called. +using PostProcessingFn = + std::function>(StringRef)>; + +/// Check if \p Image contains bitcode with triple \p Triple. +bool checkBitcodeImage(__tgt_device_image *Image, std::string Triple); + +/// Compile the bitcode image \p Image and generate the binary image that can be +/// loaded to the target device of the triple \p Triple architecture \p Arch. \p +/// PostProcessing will be called after codegen to handle cases such as assember +/// is an external tool. +Expected<__tgt_device_image *> compile(__tgt_device_image *Image, + std::string Triple, std::string Arch, + int OptLevel, + PostProcessingFn PostProcessing); +} // namespace jit +} // namespace omp +} // namespace llvm + +#endif // OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp @@ -0,0 +1,287 @@ +//===- JIT.cpp - Target independent JIT infrastructure --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "JIT.h" +#include "Debug.h" + +#include "omptarget.h" + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/CommandFlags.h" +#include "llvm/LTO/LTO.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/OffloadBinary.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/TimeProfiler.h" +#include "llvm/Target/TargetOptions.h" + +#include +#include + +using namespace llvm; +using namespace llvm::object; +using namespace omp; + +static codegen::RegisterCodeGenFlags RCGF; + +namespace { +std::once_flag InitFlag; + +void init() { + // Initialize the configured targets. + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllAsmPrinters(); +} + +bool isValidCIdentifier(StringRef S) { + return !S.empty() && (isAlpha(S[0]) || S[0] == '_') && + llvm::all_of(llvm::drop_begin(S), + [](char C) { return C == '_' || isAlnum(C); }); +} + +/// +class JITEngine { +public: + JITEngine(std::string Triple, std::string Arch) : TT(Triple), Arch(Arch) {} + + ~JITEngine() { + if (!OutputFile.empty()) + sys::fs::remove(OutputFile); + } + + /// + Expected> + run(__tgt_device_image *Image, int OptLeve, + jit::PostProcessingFn PostProcessing); + +private: + /// + Expected createOffloadFile(__tgt_device_image *Image); + + /// + Expected backend(OffloadFile &Input, int OptLevel); + + const llvm::Triple TT; + const std::string Arch; + SmallString<128> OutputFile; +}; + +Expected JITEngine::createOffloadFile(__tgt_device_image *Image) { + OffloadBinary::OffloadingImage OI; + OI.TheImageKind = IMG_Bitcode; + OI.TheOffloadKind = OFK_OpenMP; + OI.StringData = {{"triple", TT.str()}, {"arch", Arch}}; + + StringRef Data((const char *)Image->ImageStart, + (char *)Image->ImageEnd - (char *)Image->ImageStart); + std::unique_ptr MB = MemoryBuffer::getMemBuffer( + Data, + "jit-image-" + + std::to_string(reinterpret_cast(Image->ImageStart)), + /* RequiresNullTerminator */ false); + OI.Image = std::move(MB); + + std::unique_ptr Binary = OffloadBinary::write(OI); + auto NewBinaryOrErr = OffloadBinary::create(*Binary); + if (!NewBinaryOrErr) + return NewBinaryOrErr.takeError(); + return OffloadFile(std::move(*NewBinaryOrErr), std::move(Binary)); +} + +// NOTE: most of the code in this function is from +// clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp. +Expected JITEngine::backend(OffloadFile &InputFile, int OptLevel) { + lto::Config Conf; + + Conf.CPU = Arch; + Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(TT); + Conf.OptLevel = OptLevel; + Conf.UseDefaultPipeline = true; + Conf.DefaultTriple = TT.getTriple(); + Conf.PTO.LoopVectorization = Conf.OptLevel > 1; + Conf.PTO.SLPVectorization = Conf.OptLevel > 1; + Conf.CGFileType = TT.isNVPTX() ? CGFT_AssemblyFile : CGFT_ObjectFile; + Conf.HasWholeProgramVisibility = true; + + lto::ThinBackend Backend = + lto::createInProcessThinBackend(heavyweight_hardware_concurrency()); + auto LTOBackend = std::make_unique(std::move(Conf), Backend); + + BumpPtrAllocator Alloc; + StringSaver Saver(Alloc); + + StringRef Identifier = Saver.save( + InputFile.getBinary()->getMemoryBufferRef().getBufferIdentifier()); + MemoryBufferRef Buffer = + MemoryBufferRef(InputFile.getBinary()->getImage(), Identifier); + + Expected> BitcodeFileOrErr = + lto::InputFile::create(Buffer); + if (!BitcodeFileOrErr) + return BitcodeFileOrErr.takeError(); + + const auto Symbols = (*BitcodeFileOrErr)->symbols(); + + SmallVector Resolutions(Symbols.size()); + DenseSet PrevailingSymbols; + DenseSet UsedInRegularObj; + DenseSet UsedInSharedLib; + + size_t Idx = 0; + for (auto &Sym : Symbols) { + lto::SymbolResolution &Res = Resolutions[Idx++]; + + Res.Prevailing = !Sym.isUndefined() && + PrevailingSymbols.insert(Saver.save(Sym.getName())).second; + + Res.VisibleToRegularObj = + UsedInRegularObj.contains(Sym.getName()) || + isValidCIdentifier(Sym.getSectionName()) || + (Res.Prevailing && + (Sym.getVisibility() != GlobalValue::HiddenVisibility && + !Sym.canBeOmittedFromSymbolTable())); + + Res.ExportDynamic = Sym.getVisibility() != GlobalValue::HiddenVisibility && + (UsedInSharedLib.contains(Sym.getName()) || + !Sym.canBeOmittedFromSymbolTable()); + + Res.FinalDefinitionInLinkageUnit = + Sym.getVisibility() != GlobalValue::DefaultVisibility && + (!Sym.isUndefined() && !Sym.isCommon()); + + Res.LinkerRedefined = false; + } + + // Add the bitcode file with its resolved symbols to the LTO job. + if (Error Err = LTOBackend->add(std::move(*BitcodeFileOrErr), Resolutions)) + return Err; + + assert(LTOBackend->getMaxTasks() == 1); + + if (std::error_code EC = sys::fs::createTemporaryFile( + "libomptarget-jit-" + TT.str() + "-" + Arch, + TT.isNVPTX() ? ".s" : ".o", OutputFile)) + return createFileError(OutputFile, EC); + + auto AddStream = + [&](size_t Task, + const Twine &ModuleName) -> std::unique_ptr { + int FD = -1; + if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD)) + return nullptr; + return std::make_unique( + std::make_unique(FD, true)); + }; + + if (Error Err = LTOBackend->run(AddStream)) + return Err; + + return OutputFile.str(); +} + +Expected> +JITEngine::run(__tgt_device_image *Image, int OptLevel, + jit::PostProcessingFn PostProcessing) { + auto FileOrErr = createOffloadFile(Image); + if (!FileOrErr) + return FileOrErr.takeError(); + + auto OutputFileOrErr = backend(*FileOrErr, OptLevel); + if (!OutputFileOrErr) + return OutputFileOrErr.takeError(); + + return PostProcessing(*OutputFileOrErr); +} + +/// A map from a bitcode image start address to its corresponding triple. If the +/// image is not in the map, it is not a bitcode image. +DenseMap BitcodeImageMap; + +/// Output images generated from LLVM backend. +std::list> JITImages; + +/// A list of __tgt_device_image images. +std::list<__tgt_device_image> TgtImages; +} // namespace + +namespace llvm { +namespace omp { +namespace jit { +bool checkBitcodeImage(__tgt_device_image *Image, std::string Triple) { + TimeTraceScope TimeScope("Check bitcode image"); + + std::call_once(InitFlag, init); + + { + auto Itr = BitcodeImageMap.find(Image->ImageStart); + if (Itr != BitcodeImageMap.end() && + StringRef(Itr->second).starts_with(Triple)) + return true; + } + + StringRef Data((const char *)Image->ImageStart, + (char *)Image->ImageEnd - (char *)Image->ImageStart); + std::unique_ptr MB = + MemoryBuffer::getMemBuffer(Data, "", /* RequiresNullTerminator */ false); + if (!MB) + return false; + + Expected FOrErr = object::readIRSymtab(*MB); + if (!FOrErr) { + consumeError(FOrErr.takeError()); + return false; + } + + auto ActualTriple = FOrErr->TheReader.getTargetTriple(); + + if (ActualTriple.starts_with(Triple)) { + BitcodeImageMap[Image->ImageStart] = ActualTriple; + return true; + } + + return false; +} + +Expected<__tgt_device_image *> compile(__tgt_device_image *Image, + std::string Triple, std::string Arch, + int OptLevel, + PostProcessingFn PostProcessing) { + JITEngine J(Triple, Arch); + + auto ImageMBOrErr = J.run(Image, OptLevel, PostProcessing); + if (!ImageMBOrErr) + return ImageMBOrErr.takeError(); + + JITImages.push_back(std::move(*ImageMBOrErr)); + TgtImages.push_back(*Image); + + auto &ImageMB = JITImages.back(); + auto *NewImage = &TgtImages.back(); + + NewImage->ImageStart = (void *)ImageMB->getBufferStart(); + NewImage->ImageEnd = (void *)ImageMB->getBufferEnd(); + + return NewImage; +} + +} // namespace jit +} // namespace omp +} // namespace llvm diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -372,6 +372,20 @@ } uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; } + /// Get target architecture. + virtual std::string getArch() const { + llvm_unreachable("device doesn't support JIT"); + } + + /// Post processing after jit backend. Since the output of LTO backend is + /// written to file, the only argument here is the temporary file name. It is + /// expected to return a memory buffer that contains the binary image. Targets + /// like NVPTX can use this function to call the actual assembler to get the + /// actual device image; otherwise the default implementation of this function + /// simply reads the file. + virtual Expected> + doJITPostProcessing(StringRef FileName) const; + private: /// Register offload entry for global variable. Error registerGlobalOffloadEntry(DeviceImageTy &DeviceImage, @@ -482,6 +496,11 @@ /// Get the ELF code to recognize the binary image of this plugin. virtual uint16_t getMagicElfBits() const = 0; + /// Get the target triple of this plugin. + virtual std::string getTriple() const { + llvm_unreachable("target doesn't support jit"); + } + /// Allocate a structure using the internal allocator. template Ty *allocate() { return reinterpret_cast(Allocator.Allocate(sizeof(Ty), alignof(Ty))); diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -11,6 +11,7 @@ #include "PluginInterface.h" #include "Debug.h" #include "GlobalHandler.h" +#include "JIT.h" #include "elf_common.h" #include "omptarget.h" #include "omptargetplugin.h" @@ -554,6 +555,14 @@ return Plugin::success(); } +Expected> +GenericDeviceTy::doJITPostProcessing(StringRef FileName) const { + auto MBOrErr = MemoryBuffer::getFile(FileName); + if (std::error_code EC = MBOrErr.getError()) + return createFileError(FileName, EC); + return std::move(*MBOrErr); +} + /// Exposed library API function, basically wrappers around the GenericDeviceTy /// functionality with the same name. All non-async functions are redirected /// to the async versions right away with a NULL AsyncInfoPtr. @@ -583,7 +592,10 @@ if (!Plugin::isActive()) return false; - return elf_check_machine(TgtImage, Plugin::get().getMagicElfBits()); + if (elf_check_machine(TgtImage, Plugin::get().getMagicElfBits())) + return true; + + return jit::checkBitcodeImage(TgtImage, Plugin::get().getTriple()); } int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *TgtImage, @@ -654,7 +666,36 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId, __tgt_device_image *TgtImage) { GenericPluginTy &Plugin = Plugin::get(); - auto TableOrErr = Plugin.getDevice(DeviceId).loadBinary(Plugin, TgtImage); + GenericDeviceTy &Device = Plugin.getDevice(DeviceId); + + // If it is a bitcode image, we have to jit the binary image before loading to + // the device. + { + std::string Triple = Plugin.getTriple(); + std::string Arch = Device.getArch(); + + jit::PostProcessingFn PostProcessing = + [&Device]( + StringRef FileName) -> Expected> { + return Device.doJITPostProcessing(FileName); + }; + + if (jit::checkBitcodeImage(TgtImage, Triple)) { + auto TgtImageOrErr = jit::compile(TgtImage, Triple, Arch, + /* OptLevel */ 3, PostProcessing); + if (!TgtImageOrErr) { + auto Err = TgtImageOrErr.takeError(); + REPORT("Failure to jit binary image from bitcode image %p on device " + "%d: %s\n", + TgtImage, DeviceId, toString(std::move(Err)).data()); + return nullptr; + } + + TgtImage = *TgtImageOrErr; + } + } + + auto TableOrErr = Device.loadBinary(Plugin, TgtImage); if (!TableOrErr) { auto Err = TableOrErr.takeError(); REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage, diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -278,6 +278,14 @@ GridValues.GV_Warp_Size)) return Err; + if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, + ComputeCapability.Major)) + return Err; + + if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, + ComputeCapability.Minor)) + return Err; + return Plugin::success(); } @@ -776,6 +784,8 @@ return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"); } + std::string getArch() const override { return ComputeCapability.str(); } + private: using CUDAStreamManagerTy = GenericDeviceResourceManagerTy; using CUDAEventManagerTy = GenericDeviceResourceManagerTy; @@ -792,6 +802,15 @@ /// The CUDA device handler. CUdevice Device = CU_DEVICE_INVALID; + + /// + struct ComputeCapabilityTy { + uint32_t Major; + uint32_t Minor; + std::string str() const { + return "sm_" + std::to_string(Major * 10 + Minor); + } + } ComputeCapability; }; Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice, @@ -890,6 +909,8 @@ /// Get the ELF code for recognizing the compatible image binary. uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; } + std::string getTriple() const override { return "nvptx"; } + /// Check whether the image is compatible with the available CUDA devices. Expected isImageCompatible(__tgt_image_info *Info) const override { for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) { diff --git a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp @@ -358,6 +358,10 @@ Expected isImageCompatible(__tgt_image_info *Info) const override { return true; } + + std::string getTriple() const override { + return LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE; + } }; GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); }