diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -50,6 +50,7 @@ set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-pc-linux-gnu-LTO") set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda") set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-LTO") +set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-JIT-LTO") # Once the plugins for the different targets are validated, they will be added to # the list of supported targets in the current system. @@ -73,7 +74,7 @@ # Follow host OMPT support and check if host support has been requested. # LIBOMP_HAVE_OMPT_SUPPORT indicates whether host OMPT support has been implemented. # LIBOMP_OMPT_SUPPORT indicates whether host OMPT support has been requested (default is ON). -# LIBOMPTARGET_OMPT_SUPPORT indicates whether target OMPT support has been requested (default is ON). +# LIBOMPTARGET_OMPT_SUPPORT indicates whether target OMPT support has been requested (default is ON). set(OMPT_TARGET_DEFAULT FALSE) if ((LIBOMP_HAVE_OMPT_SUPPORT) AND (LIBOMP_OMPT_SUPPORT) AND (NOT WIN32)) set (OMPT_TARGET_DEFAULT TRUE) diff --git a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt --- a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt +++ b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt @@ -33,6 +33,9 @@ # Define macro with the ELF ID for this target. add_definitions("-DTARGET_ELF_ID=${elf_machine_id}") + # Define target regiple + add_definitions("-DLIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE=${tmachine}") + add_llvm_library("omptarget.rtl.${tmachine_libname}.nextgen" SHARED diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt @@ -12,7 +12,14 @@ # NOTE: Don't try to build `PluginInterface` using `add_llvm_library` because we # don't want to export `PluginInterface` while `add_llvm_library` requires that. -add_library(PluginInterface OBJECT PluginInterface.cpp GlobalHandler.cpp) +add_library(PluginInterface OBJECT + PluginInterface.cpp GlobalHandler.cpp JIT.cpp) + +# Only enable JIT for those targets that LLVM can support. +string(TOUPPER "${LLVM_TARGETS_TO_BUILD}" TargetsSupported) +foreach(Target ${TargetsSupported}) + target_compile_definitions(PluginInterface PRIVATE "LIBOMPTARGET_JIT_${TARGET}") +endforeach() # This is required when using LLVM libraries. llvm_update_compile_flags(PluginInterface) @@ -20,7 +27,31 @@ if (LLVM_LINK_LLVM_DYLIB) set(llvm_libs LLVM) else() - llvm_map_components_to_libnames(llvm_libs Support) + llvm_map_components_to_libnames(llvm_libs + ${LLVM_TARGETS_TO_BUILD} + AggressiveInstCombine + Analysis + BinaryFormat + BitReader + BitWriter + CodeGen + Core + Extensions + InstCombine + Instrumentation + IPO + IRReader + Linker + MC + Object + Passes + Remarks + ScalarOpts + Support + Target + TransformUtils + Vectorize + ) endif() target_link_libraries(PluginInterface diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h @@ -0,0 +1,50 @@ +//===- JIT.h - Target independent JIT infrastructure ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H +#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Error.h" + +#include +#include +#include + +struct __tgt_device_image; + +namespace llvm { +class MemoryBuffer; + +namespace omp { +namespace jit { + +/// Function type for a callback that will be called after the backend is +/// called. +using PostProcessingFn = std::function>( + std::unique_ptr)>; + +/// Check if \p Image contains bitcode with triple \p Triple. +bool checkBitcodeImage(__tgt_device_image *Image, Triple::ArchType TA); + +/// Compile the bitcode image \p Image and generate the binary image that can be +/// loaded to the target device of the triple \p Triple architecture \p MCpu. \p +/// PostProcessing will be called after codegen to handle cases such as assember +/// as an external tool. +Expected<__tgt_device_image *> compile(__tgt_device_image *Image, + Triple::ArchType TA, std::string MCpu, + unsigned OptLevel, + PostProcessingFn PostProcessing); +} // namespace jit +} // namespace omp +} // namespace llvm + +#endif // OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp @@ -0,0 +1,375 @@ +//===- JIT.cpp - Target independent JIT infrastructure --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "JIT.h" +#include "Debug.h" + +#include "omptarget.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/CommandFlags.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LLVMRemarkStreamer.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Passes/OptimizationLevel.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" + +#include + +using namespace llvm; +using namespace llvm::object; +using namespace omp; + +static codegen::RegisterCodeGenFlags RCGF; + +namespace { +std::once_flag InitFlag; + +void init(Triple TT) { + bool JITTargetInitialized = false; +#ifdef LIBOMPTARGET_JIT_NVPTX + if (TT.isNVPTX()) { + LLVMInitializeNVPTXTargetInfo(); + LLVMInitializeNVPTXTarget(); + LLVMInitializeNVPTXTargetMC(); + LLVMInitializeNVPTXAsmPrinter(); + JITTargetInitialized = true; + } +#endif +#ifdef LIBOMPTARGET_JIT_AMDGPU + if (TT.isAMDGPU()) { + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmPrinter(); + JITTargetInitialized = true; + } +#endif + if (!JITTargetInitialized) { + FAILURE_MESSAGE("unsupported JIT target"); + abort(); + } + + // Initialize passes + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeCore(Registry); + initializeScalarOpts(Registry); + initializeVectorization(Registry); + initializeIPO(Registry); + initializeAnalysis(Registry); + initializeTransformUtils(Registry); + initializeInstCombine(Registry); + initializeTarget(Registry); + + initializeExpandLargeDivRemLegacyPassPass(Registry); + initializeExpandLargeFpConvertLegacyPassPass(Registry); + initializeExpandMemCmpPassPass(Registry); + initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); + initializeSelectOptimizePass(Registry); + initializeCodeGenPreparePass(Registry); + initializeAtomicExpandPass(Registry); + initializeRewriteSymbolsLegacyPassPass(Registry); + initializeWinEHPreparePass(Registry); + initializeDwarfEHPrepareLegacyPassPass(Registry); + initializeSafeStackLegacyPassPass(Registry); + initializeSjLjEHPreparePass(Registry); + initializePreISelIntrinsicLoweringLegacyPassPass(Registry); + initializeGlobalMergePass(Registry); + initializeIndirectBrExpandPassPass(Registry); + initializeInterleavedLoadCombinePass(Registry); + initializeInterleavedAccessPass(Registry); + initializeUnreachableBlockElimLegacyPassPass(Registry); + initializeExpandReductionsPass(Registry); + initializeExpandVectorPredicationPass(Registry); + initializeWasmEHPreparePass(Registry); + initializeWriteBitcodePassPass(Registry); + initializeHardwareLoopsPass(Registry); + initializeTypePromotionPass(Registry); + initializeReplaceWithVeclibLegacyPass(Registry); + initializeJMCInstrumenterPass(Registry); +} + +Expected> +createModuleFromImage(__tgt_device_image *Image, LLVMContext &Context) { + StringRef Data((const char *)Image->ImageStart, + (char *)Image->ImageEnd - (char *)Image->ImageStart); + std::unique_ptr MB = MemoryBuffer::getMemBuffer( + Data, /* BufferName */ "", /* RequiresNullTerminator */ false); + SMDiagnostic Err; + auto Mod = parseIR(*MB, Err, Context); + if (!Mod) + return make_error("Failed to create module", + inconvertibleErrorCode()); + return Mod; +} + +CodeGenOpt::Level getCGOptLevel(unsigned OptLevel) { + switch (OptLevel) { + case 0: + return CodeGenOpt::None; + case 1: + return CodeGenOpt::Less; + case 2: + return CodeGenOpt::Default; + case 3: + return CodeGenOpt::Aggressive; + } + llvm_unreachable("Invalid optimization level"); +} + +OptimizationLevel getOptLevel(unsigned OptLevel) { + switch (OptLevel) { + case 0: + return OptimizationLevel::O0; + case 1: + return OptimizationLevel::O1; + case 2: + return OptimizationLevel::O2; + case 3: + return OptimizationLevel::O3; + } + llvm_unreachable("Invalid optimization level"); +} + +Expected> +createTargetMachine(Module &M, std::string CPU, unsigned OptLevel) { + Triple TT(M.getTargetTriple()); + CodeGenOpt::Level CGOptLevel = getCGOptLevel(OptLevel); + + std::string Msg; + const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg); + if (!T) + return make_error(Msg, inconvertibleErrorCode()); + + SubtargetFeatures Features; + Features.getDefaultSubtargetFeatures(TT); + + std::optional RelocModel; + if (M.getModuleFlag("PIC Level")) + RelocModel = + M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; + + std::optional CodeModel = M.getCodeModel(); + + TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(TT); + + std::unique_ptr TM( + T->createTargetMachine(M.getTargetTriple(), CPU, Features.getString(), + Options, RelocModel, CodeModel, CGOptLevel)); + if (!TM) + return make_error("Failed to create target machine", + inconvertibleErrorCode()); + return TM; +} + +/// +class JITEngine { +public: + JITEngine(Triple::ArchType TA, std::string MCpu) + : TT(Triple::getArchTypeName(TA)), CPU(MCpu) { + std::call_once(InitFlag, init, TT); + } + + /// Run jit compilation. It is expected to get a memory buffer containing the + /// generated device image that could be loaded to the device directly. + Expected> + run(__tgt_device_image *Image, unsigned OptLevel, + jit::PostProcessingFn PostProcessing); + +private: + /// Run backend, which contains optimization and code generation. + Expected> backend(Module &M, unsigned OptLevel); + + /// Run optimization pipeline. + void opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M, + unsigned OptLevel); + + /// Run code generation. + void codegen(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M, + raw_pwrite_stream &OS); + + LLVMContext Context; + const Triple TT; + const std::string CPU; +}; + +void JITEngine::opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M, + unsigned OptLevel) { + PipelineTuningOptions PTO; + std::optional PGOOpt; + + LoopAnalysisManager LAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + ModulePassManager MPM; + + PassBuilder PB(TM, PTO, PGOOpt, nullptr); + + FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); + + // Register all the basic analyses with the managers. + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerFunctionAnalyses(FAM); + PB.registerLoopAnalyses(LAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + + MPM.addPass(PB.buildPerModuleDefaultPipeline(getOptLevel(OptLevel))); + + MPM.run(M, MAM); +} + +void JITEngine::codegen(TargetMachine *TM, TargetLibraryInfoImpl *TLII, + Module &M, raw_pwrite_stream &OS) { + legacy::PassManager PM; + PM.add(new TargetLibraryInfoWrapperPass(*TLII)); + MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass( + reinterpret_cast(TM)); + TM->addPassesToEmitFile(PM, OS, nullptr, + TT.isNVPTX() ? CGFT_AssemblyFile : CGFT_ObjectFile, + /* DisableVerify */ false, MMIWP); + + PM.run(M); +} + +Expected> JITEngine::backend(Module &M, + unsigned OptLevel) { + + auto RemarksFileOrErr = setupLLVMOptimizationRemarks( + Context, /* RemarksFilename */ "", /* RemarksPasses */ "", + /* RemarksFormat */ "", /* RemarksWithHotness */ false); + if (Error E = RemarksFileOrErr.takeError()) + return std::move(E); + if (*RemarksFileOrErr) + (*RemarksFileOrErr)->keep(); + + auto TMOrErr = createTargetMachine(M, CPU, OptLevel); + if (!TMOrErr) + return TMOrErr.takeError(); + + std::unique_ptr TM = std::move(*TMOrErr); + TargetLibraryInfoImpl TLII(TT); + + opt(TM.get(), &TLII, M, OptLevel); + + // Prepare the output buffer and stream for codegen. + SmallVector CGOutputBuffer; + raw_svector_ostream OS(CGOutputBuffer); + + codegen(TM.get(), &TLII, M, OS); + + return MemoryBuffer::getMemBufferCopy(OS.str()); +} + +Expected> +JITEngine::run(__tgt_device_image *Image, unsigned OptLevel, + jit::PostProcessingFn PostProcessing) { + auto ModOrErr = createModuleFromImage(Image, Context); + if (!ModOrErr) + return ModOrErr.takeError(); + + auto Mod = std::move(*ModOrErr); + + auto MBOrError = backend(*Mod, OptLevel); + if (!MBOrError) + return MBOrError.takeError(); + + return PostProcessing(std::move(*MBOrError)); +} + +/// A map from a bitcode image start address to its corresponding triple. If the +/// image is not in the map, it is not a bitcode image. +DenseMap BitcodeImageMap; + +/// Output images generated from LLVM backend. +SmallVector, 4> JITImages; + +/// A list of __tgt_device_image images. +std::list<__tgt_device_image> TgtImages; +} // namespace + +namespace llvm { +namespace omp { +namespace jit { +bool checkBitcodeImage(__tgt_device_image *Image, Triple::ArchType TA) { + TimeTraceScope TimeScope("Check bitcode image"); + + { + auto Itr = BitcodeImageMap.find(Image->ImageStart); + if (Itr != BitcodeImageMap.end() && Itr->second == TA) + return true; + } + + StringRef Data(reinterpret_cast(Image->ImageStart), + reinterpret_cast(Image->ImageEnd) - + reinterpret_cast(Image->ImageStart)); + std::unique_ptr MB = MemoryBuffer::getMemBuffer( + Data, /* BufferName */ "", /* RequiresNullTerminator */ false); + if (!MB) + return false; + + Expected FOrErr = object::readIRSymtab(*MB); + if (!FOrErr) { + consumeError(FOrErr.takeError()); + return false; + } + + auto ActualTriple = FOrErr->TheReader.getTargetTriple(); + + if (Triple(ActualTriple).getArch() == TA) { + BitcodeImageMap[Image->ImageStart] = TA; + return true; + } + + return false; +} + +Expected<__tgt_device_image *> compile(__tgt_device_image *Image, + Triple::ArchType TA, std::string MCPU, + unsigned OptLevel, + PostProcessingFn PostProcessing) { + JITEngine J(TA, MCPU); + + auto ImageMBOrErr = J.run(Image, OptLevel, PostProcessing); + if (!ImageMBOrErr) + return ImageMBOrErr.takeError(); + + JITImages.push_back(std::move(*ImageMBOrErr)); + TgtImages.push_back(*Image); + + auto &ImageMB = JITImages.back(); + auto *NewImage = &TgtImages.back(); + + NewImage->ImageStart = (void *)ImageMB->getBufferStart(); + NewImage->ImageEnd = (void *)ImageMB->getBufferEnd(); + + return NewImage; +} + +} // namespace jit +} // namespace omp +} // namespace llvm diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -26,6 +26,7 @@ #include "omptarget.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" #include "llvm/Support/Allocator.h" @@ -377,6 +378,17 @@ } uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; } + /// Get target architecture. + virtual std::string getArch() const { + llvm_unreachable("device doesn't support JIT"); + } + + /// Post processing after jit backend. The ownership of \p MB will be taken. + virtual Expected> + doJITPostProcessing(std::unique_ptr MB) const { + return MB; + } + private: /// Register offload entry for global variable. Error registerGlobalOffloadEntry(DeviceImageTy &DeviceImage, @@ -526,6 +538,11 @@ /// Get the ELF code to recognize the binary image of this plugin. virtual uint16_t getMagicElfBits() const = 0; + /// Get the target triple of this plugin. + virtual Triple::ArchType getTripleArch() const { + llvm_unreachable("target doesn't support jit"); + } + /// Allocate a structure using the internal allocator. template Ty *allocate() { return reinterpret_cast(Allocator.Allocate(sizeof(Ty), alignof(Ty))); diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -11,6 +11,7 @@ #include "PluginInterface.h" #include "Debug.h" #include "GlobalHandler.h" +#include "JIT.h" #include "elf_common.h" #include "omptarget.h" #include "omptargetplugin.h" @@ -629,7 +630,10 @@ if (!Plugin::isActive()) return false; - return elf_check_machine(TgtImage, Plugin::get().getMagicElfBits()); + if (elf_check_machine(TgtImage, Plugin::get().getMagicElfBits())) + return true; + + return jit::checkBitcodeImage(TgtImage, Plugin::get().getTripleArch()); } int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *TgtImage, @@ -700,7 +704,37 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId, __tgt_device_image *TgtImage) { GenericPluginTy &Plugin = Plugin::get(); - auto TableOrErr = Plugin.getDevice(DeviceId).loadBinary(Plugin, TgtImage); + GenericDeviceTy &Device = Plugin.getDevice(DeviceId); + + // If it is a bitcode image, we have to jit the binary image before loading to + // the device. + { + UInt32Envar JITOptLevel("LIBOMPTARGET_JIT_OPT_LEVEL", 3); + Triple::ArchType TA = Plugin.getTripleArch(); + std::string Arch = Device.getArch(); + + jit::PostProcessingFn PostProcessing = + [&Device](std::unique_ptr MB) + -> Expected> { + return Device.doJITPostProcessing(std::move(MB)); + }; + + if (jit::checkBitcodeImage(TgtImage, TA)) { + auto TgtImageOrErr = + jit::compile(TgtImage, TA, Arch, JITOptLevel, PostProcessing); + if (!TgtImageOrErr) { + auto Err = TgtImageOrErr.takeError(); + REPORT("Failure to jit binary image from bitcode image %p on device " + "%d: %s\n", + TgtImage, DeviceId, toString(std::move(Err)).data()); + return nullptr; + } + + TgtImage = *TgtImageOrErr; + } + } + + auto TableOrErr = Device.loadBinary(Plugin, TgtImage); if (!TableOrErr) { auto Err = TableOrErr.takeError(); REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage, diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -278,6 +278,14 @@ GridValues.GV_Warp_Size)) return Err; + if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, + ComputeCapability.Major)) + return Err; + + if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, + ComputeCapability.Minor)) + return Err; + return Plugin::success(); } @@ -794,6 +802,9 @@ return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"); } + /// See GenericDeviceTy::getArch(). + std::string getArch() const override { return ComputeCapability.str(); } + private: using CUDAStreamManagerTy = GenericDeviceResourceManagerTy; using CUDAEventManagerTy = GenericDeviceResourceManagerTy; @@ -810,6 +821,15 @@ /// The CUDA device handler. CUdevice Device = CU_DEVICE_INVALID; + + /// The compute capability of the corresponding CUDA device. + struct ComputeCapabilityTy { + uint32_t Major; + uint32_t Minor; + std::string str() const { + return "sm_" + std::to_string(Major * 10 + Minor); + } + } ComputeCapability; }; Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice, @@ -908,6 +928,11 @@ /// Get the ELF code for recognizing the compatible image binary. uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; } + Triple::ArchType getTripleArch() const override { + // TODO: I think we can drop the support for 32-bit NVPTX devices. + return Triple::nvptx64; + } + /// Check whether the image is compatible with the available CUDA devices. Expected isImageCompatible(__tgt_image_info *Info) const override { for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) { diff --git a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp @@ -364,6 +364,10 @@ Expected isImageCompatible(__tgt_image_info *Info) const override { return true; } + + Triple::ArchType getTripleArch() const override { + return Triple::LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE; + } }; GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); } diff --git a/openmp/libomptarget/test/lit.cfg b/openmp/libomptarget/test/lit.cfg --- a/openmp/libomptarget/test/lit.cfg +++ b/openmp/libomptarget/test/lit.cfg @@ -34,6 +34,15 @@ else: config.environment[name] = value +# Evalute the environment variable which is a string boolean value. +def evaluate_bool_env(env): + env = env.lower() + possible_true_values = ["on", "true", "1"] + for v in possible_true_values: + if env == v: + return True + return False + # name: The name of this test suite. config.name = 'libomptarget :: ' + config.libomptarget_current_target @@ -111,10 +120,17 @@ config.test_flags += " --libomptarget-nvptx-bc-path=" + config.library_dir if config.libomptarget_current_target.endswith('-LTO'): config.test_flags += " -foffload-lto" + if config.libomptarget_current_target.endswith('-JIT-LTO') and evaluate_bool_env( + config.environment['LIBOMPTARGET_NEXTGEN_PLUGINS'] + ): + config.test_flags += " -foffload-lto" + config.test_flags += " -Wl,--embed-bitcode" def remove_suffix_if_present(name): if name.endswith('-LTO'): return name[:-4] + elif name.endswith('-JIT-LTO'): + return name[:-8] else: return name