diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -713,6 +713,10 @@ * ``LIBOMPTARGET_STACK_SIZE=`` * ``LIBOMPTARGET_SHARED_MEMORY_SIZE=`` * ``LIBOMPTARGET_MAP_FORCE_ATOMIC=[TRUE/FALSE] (default TRUE)`` + * ``LIBOMPTARGET_JIT_OPT_LEVEL={0,1,2,3} (default 3)`` + * ``LIBOMPTARGET_JIT_REPLACEMENT_MODULE= (LLVM-IR file)`` + * ``LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE= (LLVM-IR file)`` + * ``LIBOMPTARGET_JIT_POST_OPT_IR_MODULE= (LLVM-IR file)`` LIBOMPTARGET_DEBUG """""""""""""""""" @@ -1050,6 +1054,52 @@ The default behavior of LLVM 14 is to force atomic maps clauses, prior versions of LLVM did not. + +LIBOMPTARGET_JIT_OPT_LEVEL +"""""""""""""""""""""""""" + +This environment variable can be used to change the optimization pipeleine used +to optimize the embedded device code as part of the device JIT. The value is +corresponds to the ``-O{0,1,2,3}`` command line argument passed to ``clang``. + + +LIBOMPTARGET_JIT_REPLACEMENT_MODULE +""""""""""""""""""""""""""""""""""" + +This environment variable can be used to replace the embedded device code +before the device JIT finishes compilation for the target. The value is +expected to be a filename to an LLVM-IR file, thus containing an LLVM-IR module +for the respective target. To obtain a device code image compatible with the +embedded one it is recommended to extract the embedded one either before or +after IR optimization. This can be done at compile time, after compile time via +llvm tools (llvm-objdump), or, simply, by setting the +:ref:`LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE` or +:ref:`LIBOMPTARGET_JIT_POST_OPT_IR_MODULE` environment variables. + + +LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE +"""""""""""""""""""""""""""""""""" + +This environment variable can be used to extract the embedded device code +before the device JIT runs additional IR optimizations on it (see +:ref:`LIBOMPTARGET_JIT_OPT_LEVEL`). The value is expected to be a filename into +which the LLVM-IR module is written. The module can be the analyzed, and +transformed and loaded back into the JIT pipeline via +:ref:`LIBOMPTARGET_JIT_REPLACEMENT_MODULE`. + + +LIBOMPTARGET_JIT_POST_OPT_IR_MODULE +"""""""""""""""""""""""""""""""""" + +This environment variable can be used to extract the embedded device code after +the device JIT runs additional IR optimizations on it (see +:ref:`LIBOMPTARGET_JIT_OPT_LEVEL`). The value is expected to be a filename into +which the LLVM-IR module is written. The module can be the analyzed, and +transformed and loaded back into the JIT pipeline via +:ref:`LIBOMPTARGET_JIT_REPLACEMENT_MODULE`. + + + .. _libomptarget_plugin: LLVM/OpenMP Target Host Runtime Plugins (``libomptarget.rtl.XXXX``) diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp @@ -11,6 +11,7 @@ #include "JIT.h" #include "Debug.h" +#include "Utilities.h" #include "omptarget.h" #include "llvm/ADT/SmallVector.h" @@ -38,6 +39,7 @@ #include "llvm/Target/TargetOptions.h" #include +#include using namespace llvm; using namespace llvm::object; @@ -113,11 +115,8 @@ } Expected> -createModuleFromImage(__tgt_device_image *Image, LLVMContext &Context) { - StringRef Data((const char *)Image->ImageStart, - (char *)Image->ImageEnd - (char *)Image->ImageStart); - std::unique_ptr MB = MemoryBuffer::getMemBuffer( - Data, /* BufferName */ "", /* RequiresNullTerminator */ false); +createModuleFromMemoryBuffer(std::unique_ptr &MB, + LLVMContext &Context) { SMDiagnostic Err; auto Mod = parseIR(*MB, Err, Context); if (!Mod) @@ -125,6 +124,14 @@ inconvertibleErrorCode()); return Mod; } +Expected> +createModuleFromImage(__tgt_device_image *Image, LLVMContext &Context) { + StringRef Data((const char *)Image->ImageStart, + (char *)Image->ImageEnd - (char *)Image->ImageStart); + std::unique_ptr MB = MemoryBuffer::getMemBuffer( + Data, /* BufferName */ "", /* RequiresNullTerminator */ false); + return createModuleFromMemoryBuffer(MB, Context); +} CodeGenOpt::Level getCGOptLevel(unsigned OptLevel) { switch (OptLevel) { @@ -189,7 +196,10 @@ class JITEngine { public: JITEngine(Triple::ArchType TA, std::string MCpu) - : TT(Triple::getArchTypeName(TA)), CPU(MCpu) { + : TT(Triple::getArchTypeName(TA)), CPU(MCpu), + ReplacementModuleFileName("LIBOMPTARGET_JIT_REPLACEMENT_MODULE"), + PreOptIRModuleFileName("LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE"), + PostOptIRModuleFileName("LIBOMPTARGET_JIT_POST_OPT_IR_MODULE") { std::call_once(InitFlag, init, TT); } @@ -214,6 +224,11 @@ LLVMContext Context; const Triple TT; const std::string CPU; + + /// Control environment variables. + target::StringEnvar ReplacementModuleFileName; + target::StringEnvar PreOptIRModuleFileName; + target::StringEnvar PostOptIRModuleFileName; }; void JITEngine::opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M, @@ -277,8 +292,28 @@ std::unique_ptr TM = std::move(*TMOrErr); TargetLibraryInfoImpl TLII(TT); + if (PreOptIRModuleFileName.isPresent()) { + std::error_code EC; + raw_fd_stream FD(PreOptIRModuleFileName.get(), EC); + if (EC) + return createStringError( + EC, "Could not open %s to write the pre-opt IR module\n", + PreOptIRModuleFileName.get().c_str()); + M.print(FD, nullptr); + } + opt(TM.get(), &TLII, M, OptLevel); + if (PostOptIRModuleFileName.isPresent()) { + std::error_code EC; + raw_fd_stream FD(PostOptIRModuleFileName.get(), EC); + if (EC) + return createStringError( + EC, "Could not open %s to write the post-opt IR module\n", + PreOptIRModuleFileName.get().c_str()); + M.print(FD, nullptr); + } + // Prepare the output buffer and stream for codegen. SmallVector CGOutputBuffer; raw_svector_ostream OS(CGOutputBuffer); @@ -291,11 +326,26 @@ Expected> JITEngine::run(__tgt_device_image *Image, unsigned OptLevel, jit::PostProcessingFn PostProcessing) { - auto ModOrErr = createModuleFromImage(Image, Context); - if (!ModOrErr) - return ModOrErr.takeError(); - - auto Mod = std::move(*ModOrErr); + Module *Mod = nullptr; + // Check if the user replaces the module at runtime or we read it from the + // image. + if (!ReplacementModuleFileName.isPresent()) { + auto ModOrErr = createModuleFromImage(Image, Context); + if (!ModOrErr) + return ModOrErr.takeError(); + Mod = ModOrErr->release(); + } else { + auto MBOrErr = + MemoryBuffer::getFileOrSTDIN(ReplacementModuleFileName.get()); + if (!MBOrErr) + return createStringError(MBOrErr.getError(), + "Could not read replacement module from %s\n", + ReplacementModuleFileName.get().c_str()); + auto ModOrErr = createModuleFromMemoryBuffer(MBOrErr.get(), Context); + if (!ModOrErr) + return ModOrErr.takeError(); + Mod = ModOrErr->release(); + } auto MBOrError = backend(*Mod, OptLevel); if (!MBOrError) diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -740,6 +740,7 @@ // If it is a bitcode image, we have to jit the binary image before loading to // the device. { + // TODO: Move this (at least the environment variable) into the JIT.h. UInt32Envar JITOptLevel("LIBOMPTARGET_JIT_OPT_LEVEL", 3); Triple::ArchType TA = Plugin.getTripleArch(); std::string Arch = Device.getArch();