diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt
--- a/openmp/libomptarget/CMakeLists.txt
+++ b/openmp/libomptarget/CMakeLists.txt
@@ -50,6 +50,7 @@
 set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-pc-linux-gnu-LTO")
 set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda")
 set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-LTO")
+set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-JIT-LTO")
 
 # Once the plugins for the different targets are validated, they will be added to
 # the list of supported targets in the current system.
@@ -73,7 +74,7 @@
 # Follow host OMPT support and check if host support has been requested.
 # LIBOMP_HAVE_OMPT_SUPPORT indicates whether host OMPT support has been implemented.
 # LIBOMP_OMPT_SUPPORT indicates whether host OMPT support has been requested (default is ON).
-# LIBOMPTARGET_OMPT_SUPPORT indicates whether target OMPT support has been requested (default is ON). 
+# LIBOMPTARGET_OMPT_SUPPORT indicates whether target OMPT support has been requested (default is ON).
 set(OMPT_TARGET_DEFAULT FALSE)
 if ((LIBOMP_HAVE_OMPT_SUPPORT) AND (LIBOMP_OMPT_SUPPORT) AND (NOT WIN32))
   set (OMPT_TARGET_DEFAULT TRUE)
diff --git a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
--- a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
@@ -33,6 +33,9 @@
     # Define macro with the ELF ID for this target.
     add_definitions("-DTARGET_ELF_ID=${elf_machine_id}")
 
+    # Define target regiple
+    add_definitions("-DLIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE=${tmachine}")
+
     add_llvm_library("omptarget.rtl.${tmachine_libname}.nextgen"
       SHARED
 
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
@@ -12,7 +12,14 @@
 
 # NOTE: Don't try to build `PluginInterface` using `add_llvm_library` because we
 # don't want to export `PluginInterface` while `add_llvm_library` requires that.
-add_library(PluginInterface OBJECT PluginInterface.cpp GlobalHandler.cpp)
+add_library(PluginInterface OBJECT
+  PluginInterface.cpp GlobalHandler.cpp JIT.cpp)
+
+# Only enable JIT for those targets that LLVM can support.
+string(TOUPPER "${LLVM_TARGETS_TO_BUILD}" TargetsSupported)
+foreach(Target ${TargetsSupported})
+  target_compile_definitions(PluginInterface PRIVATE "LIBOMPTARGET_JIT_${TARGET}")
+endforeach()
 
 # This is required when using LLVM libraries.
 llvm_update_compile_flags(PluginInterface)
@@ -20,7 +27,31 @@
 if (LLVM_LINK_LLVM_DYLIB)
   set(llvm_libs LLVM)
 else()
-  llvm_map_components_to_libnames(llvm_libs Support)
+  llvm_map_components_to_libnames(llvm_libs
+    ${LLVM_TARGETS_TO_BUILD}
+    AggressiveInstCombine
+    Analysis
+    BinaryFormat
+    BitReader
+    BitWriter
+    CodeGen
+    Core
+    Extensions
+    InstCombine
+    Instrumentation
+    IPO
+    IRReader
+    Linker
+    MC
+    Object
+    Passes
+    Remarks
+    ScalarOpts
+    Support
+    Target
+    TransformUtils
+    Vectorize
+  )
 endif()
 
 target_link_libraries(PluginInterface
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h
new file mode 100644
--- /dev/null
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h
@@ -0,0 +1,50 @@
+//===- JIT.h - Target independent JIT infrastructure ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H
+#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Error.h"
+
+#include <functional>
+#include <memory>
+#include <string>
+
+struct __tgt_device_image;
+
+namespace llvm {
+class MemoryBuffer;
+
+namespace omp {
+namespace jit {
+
+/// Function type for a callback that will be called after the backend is
+/// called.
+using PostProcessingFn = std::function<Expected<std::unique_ptr<MemoryBuffer>>(
+    std::unique_ptr<MemoryBuffer>)>;
+
+/// Check if \p Image contains bitcode with triple \p Triple.
+bool checkBitcodeImage(__tgt_device_image *Image, Triple::ArchType TA);
+
+/// Compile the bitcode image \p Image and generate the binary image that can be
+/// loaded to the target device of the triple \p Triple architecture \p MCpu. \p
+/// PostProcessing will be called after codegen to handle cases such as assember
+/// as an external tool.
+Expected<__tgt_device_image *> compile(__tgt_device_image *Image,
+                                       Triple::ArchType TA, std::string MCpu,
+                                       unsigned OptLevel,
+                                       PostProcessingFn PostProcessing);
+} // namespace jit
+} // namespace omp
+} // namespace llvm
+
+#endif // OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
new file mode 100644
--- /dev/null
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
@@ -0,0 +1,375 @@
+//===- JIT.cpp - Target independent JIT infrastructure --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "Debug.h"
+
+#include "omptarget.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LLVMRemarkStreamer.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Passes/OptimizationLevel.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+#include <mutex>
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace omp;
+
+static codegen::RegisterCodeGenFlags RCGF;
+
+namespace {
+std::once_flag InitFlag;
+
+void init(Triple TT) {
+  bool JITTargetInitialized = false;
+#ifdef LIBOMPTARGET_JIT_NVPTX
+  if (TT.isNVPTX()) {
+    LLVMInitializeNVPTXTargetInfo();
+    LLVMInitializeNVPTXTarget();
+    LLVMInitializeNVPTXTargetMC();
+    LLVMInitializeNVPTXAsmPrinter();
+    JITTargetInitialized = true;
+  }
+#endif
+#ifdef LIBOMPTARGET_JIT_AMDGPU
+  if (TT.isAMDGPU()) {
+    LLVMInitializeAMDGPUTargetInfo();
+    LLVMInitializeAMDGPUTarget();
+    LLVMInitializeAMDGPUTargetMC();
+    LLVMInitializeAMDGPUAsmPrinter();
+    JITTargetInitialized = true;
+  }
+#endif
+  if (!JITTargetInitialized) {
+    FAILURE_MESSAGE("unsupported JIT target");
+    abort();
+  }
+
+  // Initialize passes
+  PassRegistry &Registry = *PassRegistry::getPassRegistry();
+  initializeCore(Registry);
+  initializeScalarOpts(Registry);
+  initializeVectorization(Registry);
+  initializeIPO(Registry);
+  initializeAnalysis(Registry);
+  initializeTransformUtils(Registry);
+  initializeInstCombine(Registry);
+  initializeTarget(Registry);
+
+  initializeExpandLargeDivRemLegacyPassPass(Registry);
+  initializeExpandLargeFpConvertLegacyPassPass(Registry);
+  initializeExpandMemCmpPassPass(Registry);
+  initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
+  initializeSelectOptimizePass(Registry);
+  initializeCodeGenPreparePass(Registry);
+  initializeAtomicExpandPass(Registry);
+  initializeRewriteSymbolsLegacyPassPass(Registry);
+  initializeWinEHPreparePass(Registry);
+  initializeDwarfEHPrepareLegacyPassPass(Registry);
+  initializeSafeStackLegacyPassPass(Registry);
+  initializeSjLjEHPreparePass(Registry);
+  initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
+  initializeGlobalMergePass(Registry);
+  initializeIndirectBrExpandPassPass(Registry);
+  initializeInterleavedLoadCombinePass(Registry);
+  initializeInterleavedAccessPass(Registry);
+  initializeUnreachableBlockElimLegacyPassPass(Registry);
+  initializeExpandReductionsPass(Registry);
+  initializeExpandVectorPredicationPass(Registry);
+  initializeWasmEHPreparePass(Registry);
+  initializeWriteBitcodePassPass(Registry);
+  initializeHardwareLoopsPass(Registry);
+  initializeTypePromotionPass(Registry);
+  initializeReplaceWithVeclibLegacyPass(Registry);
+  initializeJMCInstrumenterPass(Registry);
+}
+
+Expected<std::unique_ptr<Module>>
+createModuleFromImage(__tgt_device_image *Image, LLVMContext &Context) {
+  StringRef Data((const char *)Image->ImageStart,
+                 (char *)Image->ImageEnd - (char *)Image->ImageStart);
+  std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
+      Data, /* BufferName */ "", /* RequiresNullTerminator */ false);
+  SMDiagnostic Err;
+  auto Mod = parseIR(*MB, Err, Context);
+  if (!Mod)
+    return make_error<StringError>("Failed to create module",
+                                   inconvertibleErrorCode());
+  return Mod;
+}
+
+CodeGenOpt::Level getCGOptLevel(unsigned OptLevel) {
+  switch (OptLevel) {
+  case 0:
+    return CodeGenOpt::None;
+  case 1:
+    return CodeGenOpt::Less;
+  case 2:
+    return CodeGenOpt::Default;
+  case 3:
+    return CodeGenOpt::Aggressive;
+  }
+  llvm_unreachable("Invalid optimization level");
+}
+
+OptimizationLevel getOptLevel(unsigned OptLevel) {
+  switch (OptLevel) {
+  case 0:
+    return OptimizationLevel::O0;
+  case 1:
+    return OptimizationLevel::O1;
+  case 2:
+    return OptimizationLevel::O2;
+  case 3:
+    return OptimizationLevel::O3;
+  }
+  llvm_unreachable("Invalid optimization level");
+}
+
+Expected<std::unique_ptr<TargetMachine>>
+createTargetMachine(Module &M, std::string CPU, unsigned OptLevel) {
+  Triple TT(M.getTargetTriple());
+  CodeGenOpt::Level CGOptLevel = getCGOptLevel(OptLevel);
+
+  std::string Msg;
+  const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg);
+  if (!T)
+    return make_error<StringError>(Msg, inconvertibleErrorCode());
+
+  SubtargetFeatures Features;
+  Features.getDefaultSubtargetFeatures(TT);
+
+  std::optional<Reloc::Model> RelocModel;
+  if (M.getModuleFlag("PIC Level"))
+    RelocModel =
+        M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
+
+  std::optional<CodeModel::Model> CodeModel = M.getCodeModel();
+
+  TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(TT);
+
+  std::unique_ptr<TargetMachine> TM(
+      T->createTargetMachine(M.getTargetTriple(), CPU, Features.getString(),
+                             Options, RelocModel, CodeModel, CGOptLevel));
+  if (!TM)
+    return make_error<StringError>("Failed to create target machine",
+                                   inconvertibleErrorCode());
+  return TM;
+}
+
+///
+class JITEngine {
+public:
+  JITEngine(Triple::ArchType TA, std::string MCpu)
+      : TT(Triple::getArchTypeName(TA)), CPU(MCpu) {
+    std::call_once(InitFlag, init, TT);
+  }
+
+  /// Run jit compilation. It is expected to get a memory buffer containing the
+  /// generated device image that could be loaded to the device directly.
+  Expected<std::unique_ptr<MemoryBuffer>>
+  run(__tgt_device_image *Image, unsigned OptLevel,
+      jit::PostProcessingFn PostProcessing);
+
+private:
+  /// Run backend, which contains optimization and code generation.
+  Expected<std::unique_ptr<MemoryBuffer>> backend(Module &M, unsigned OptLevel);
+
+  /// Run optimization pipeline.
+  void opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M,
+           unsigned OptLevel);
+
+  /// Run code generation.
+  void codegen(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M,
+               raw_pwrite_stream &OS);
+
+  LLVMContext Context;
+  const Triple TT;
+  const std::string CPU;
+};
+
+void JITEngine::opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M,
+                    unsigned OptLevel) {
+  PipelineTuningOptions PTO;
+  std::optional<PGOOptions> PGOOpt;
+
+  LoopAnalysisManager LAM;
+  FunctionAnalysisManager FAM;
+  CGSCCAnalysisManager CGAM;
+  ModuleAnalysisManager MAM;
+  ModulePassManager MPM;
+
+  PassBuilder PB(TM, PTO, PGOOpt, nullptr);
+
+  FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
+
+  // Register all the basic analyses with the managers.
+  PB.registerModuleAnalyses(MAM);
+  PB.registerCGSCCAnalyses(CGAM);
+  PB.registerFunctionAnalyses(FAM);
+  PB.registerLoopAnalyses(LAM);
+  PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+
+  MPM.addPass(PB.buildPerModuleDefaultPipeline(getOptLevel(OptLevel)));
+
+  MPM.run(M, MAM);
+}
+
+void JITEngine::codegen(TargetMachine *TM, TargetLibraryInfoImpl *TLII,
+                        Module &M, raw_pwrite_stream &OS) {
+  legacy::PassManager PM;
+  PM.add(new TargetLibraryInfoWrapperPass(*TLII));
+  MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(
+      reinterpret_cast<LLVMTargetMachine *>(TM));
+  TM->addPassesToEmitFile(PM, OS, nullptr,
+                          TT.isNVPTX() ? CGFT_AssemblyFile : CGFT_ObjectFile,
+                          /* DisableVerify */ false, MMIWP);
+
+  PM.run(M);
+}
+
+Expected<std::unique_ptr<MemoryBuffer>> JITEngine::backend(Module &M,
+                                                           unsigned OptLevel) {
+
+  auto RemarksFileOrErr = setupLLVMOptimizationRemarks(
+      Context, /* RemarksFilename */ "", /* RemarksPasses */ "",
+      /* RemarksFormat */ "", /* RemarksWithHotness */ false);
+  if (Error E = RemarksFileOrErr.takeError())
+    return std::move(E);
+  if (*RemarksFileOrErr)
+    (*RemarksFileOrErr)->keep();
+
+  auto TMOrErr = createTargetMachine(M, CPU, OptLevel);
+  if (!TMOrErr)
+    return TMOrErr.takeError();
+
+  std::unique_ptr<TargetMachine> TM = std::move(*TMOrErr);
+  TargetLibraryInfoImpl TLII(TT);
+
+  opt(TM.get(), &TLII, M, OptLevel);
+
+  // Prepare the output buffer and stream for codegen.
+  SmallVector<char> CGOutputBuffer;
+  raw_svector_ostream OS(CGOutputBuffer);
+
+  codegen(TM.get(), &TLII, M, OS);
+
+  return MemoryBuffer::getMemBufferCopy(OS.str());
+}
+
+Expected<std::unique_ptr<MemoryBuffer>>
+JITEngine::run(__tgt_device_image *Image, unsigned OptLevel,
+               jit::PostProcessingFn PostProcessing) {
+  auto ModOrErr = createModuleFromImage(Image, Context);
+  if (!ModOrErr)
+    return ModOrErr.takeError();
+
+  auto Mod = std::move(*ModOrErr);
+
+  auto MBOrError = backend(*Mod, OptLevel);
+  if (!MBOrError)
+    return MBOrError.takeError();
+
+  return PostProcessing(std::move(*MBOrError));
+}
+
+/// A map from a bitcode image start address to its corresponding triple. If the
+/// image is not in the map, it is not a bitcode image.
+DenseMap<void *, Triple::ArchType> BitcodeImageMap;
+
+/// Output images generated from LLVM backend.
+SmallVector<std::unique_ptr<MemoryBuffer>, 4> JITImages;
+
+/// A list of __tgt_device_image images.
+std::list<__tgt_device_image> TgtImages;
+} // namespace
+
+namespace llvm {
+namespace omp {
+namespace jit {
+bool checkBitcodeImage(__tgt_device_image *Image, Triple::ArchType TA) {
+  TimeTraceScope TimeScope("Check bitcode image");
+
+  {
+    auto Itr = BitcodeImageMap.find(Image->ImageStart);
+    if (Itr != BitcodeImageMap.end() && Itr->second == TA)
+      return true;
+  }
+
+  StringRef Data(reinterpret_cast<const char *>(Image->ImageStart),
+                 reinterpret_cast<char *>(Image->ImageEnd) -
+                     reinterpret_cast<char *>(Image->ImageStart));
+  std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
+      Data, /* BufferName */ "", /* RequiresNullTerminator */ false);
+  if (!MB)
+    return false;
+
+  Expected<object::IRSymtabFile> FOrErr = object::readIRSymtab(*MB);
+  if (!FOrErr) {
+    consumeError(FOrErr.takeError());
+    return false;
+  }
+
+  auto ActualTriple = FOrErr->TheReader.getTargetTriple();
+
+  if (Triple(ActualTriple).getArch() == TA) {
+    BitcodeImageMap[Image->ImageStart] = TA;
+    return true;
+  }
+
+  return false;
+}
+
+Expected<__tgt_device_image *> compile(__tgt_device_image *Image,
+                                       Triple::ArchType TA, std::string MCPU,
+                                       unsigned OptLevel,
+                                       PostProcessingFn PostProcessing) {
+  JITEngine J(TA, MCPU);
+
+  auto ImageMBOrErr = J.run(Image, OptLevel, PostProcessing);
+  if (!ImageMBOrErr)
+    return ImageMBOrErr.takeError();
+
+  JITImages.push_back(std::move(*ImageMBOrErr));
+  TgtImages.push_back(*Image);
+
+  auto &ImageMB = JITImages.back();
+  auto *NewImage = &TgtImages.back();
+
+  NewImage->ImageStart = (void *)ImageMB->getBufferStart();
+  NewImage->ImageEnd = (void *)ImageMB->getBufferEnd();
+
+  return NewImage;
+}
+
+} // namespace jit
+} // namespace omp
+} // namespace llvm
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -26,6 +26,7 @@
 #include "omptarget.h"
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/Support/Allocator.h"
@@ -377,6 +378,17 @@
   }
   uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }
 
+  /// Get target architecture.
+  virtual std::string getArch() const {
+    llvm_unreachable("device doesn't support JIT");
+  }
+
+  /// Post processing after jit backend. The ownership of \p MB will be taken.
+  virtual Expected<std::unique_ptr<MemoryBuffer>>
+  doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const {
+    return MB;
+  }
+
 private:
   /// Register offload entry for global variable.
   Error registerGlobalOffloadEntry(DeviceImageTy &DeviceImage,
@@ -526,6 +538,11 @@
   /// Get the ELF code to recognize the binary image of this plugin.
   virtual uint16_t getMagicElfBits() const = 0;
 
+  /// Get the target triple of this plugin.
+  virtual Triple::ArchType getTripleArch() const {
+    llvm_unreachable("target doesn't support jit");
+  }
+
   /// Allocate a structure using the internal allocator.
   template <typename Ty> Ty *allocate() {
     return reinterpret_cast<Ty *>(Allocator.Allocate(sizeof(Ty), alignof(Ty)));
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -11,6 +11,7 @@
 #include "PluginInterface.h"
 #include "Debug.h"
 #include "GlobalHandler.h"
+#include "JIT.h"
 #include "elf_common.h"
 #include "omptarget.h"
 #include "omptargetplugin.h"
@@ -629,7 +630,10 @@
   if (!Plugin::isActive())
     return false;
 
-  return elf_check_machine(TgtImage, Plugin::get().getMagicElfBits());
+  if (elf_check_machine(TgtImage, Plugin::get().getMagicElfBits()))
+    return true;
+
+  return jit::checkBitcodeImage(TgtImage, Plugin::get().getTripleArch());
 }
 
 int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *TgtImage,
@@ -700,7 +704,37 @@
 __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
                                           __tgt_device_image *TgtImage) {
   GenericPluginTy &Plugin = Plugin::get();
-  auto TableOrErr = Plugin.getDevice(DeviceId).loadBinary(Plugin, TgtImage);
+  GenericDeviceTy &Device = Plugin.getDevice(DeviceId);
+
+  // If it is a bitcode image, we have to jit the binary image before loading to
+  // the device.
+  {
+    UInt32Envar JITOptLevel("LIBOMPTARGET_JIT_OPT_LEVEL", 3);
+    Triple::ArchType TA = Plugin.getTripleArch();
+    std::string Arch = Device.getArch();
+
+    jit::PostProcessingFn PostProcessing =
+        [&Device](std::unique_ptr<MemoryBuffer> MB)
+        -> Expected<std::unique_ptr<MemoryBuffer>> {
+      return Device.doJITPostProcessing(std::move(MB));
+    };
+
+    if (jit::checkBitcodeImage(TgtImage, TA)) {
+      auto TgtImageOrErr =
+          jit::compile(TgtImage, TA, Arch, JITOptLevel, PostProcessing);
+      if (!TgtImageOrErr) {
+        auto Err = TgtImageOrErr.takeError();
+        REPORT("Failure to jit binary image from bitcode image %p on device "
+               "%d: %s\n",
+               TgtImage, DeviceId, toString(std::move(Err)).data());
+        return nullptr;
+      }
+
+      TgtImage = *TgtImageOrErr;
+    }
+  }
+
+  auto TableOrErr = Device.loadBinary(Plugin, TgtImage);
   if (!TableOrErr) {
     auto Err = TableOrErr.takeError();
     REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage,
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -278,6 +278,14 @@
                                  GridValues.GV_Warp_Size))
       return Err;
 
+    if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+                                 ComputeCapability.Major))
+      return Err;
+
+    if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+                                 ComputeCapability.Minor))
+      return Err;
+
     return Plugin::success();
   }
 
@@ -794,6 +802,9 @@
     return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s");
   }
 
+  /// See GenericDeviceTy::getArch().
+  std::string getArch() const override { return ComputeCapability.str(); }
+
 private:
   using CUDAStreamManagerTy = GenericDeviceResourceManagerTy<CUDAStreamRef>;
   using CUDAEventManagerTy = GenericDeviceResourceManagerTy<CUDAEventRef>;
@@ -810,6 +821,15 @@
 
   /// The CUDA device handler.
   CUdevice Device = CU_DEVICE_INVALID;
+
+  /// The compute capability of the corresponding CUDA device.
+  struct ComputeCapabilityTy {
+    uint32_t Major;
+    uint32_t Minor;
+    std::string str() const {
+      return "sm_" + std::to_string(Major * 10 + Minor);
+    }
+  } ComputeCapability;
 };
 
 Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
@@ -908,6 +928,11 @@
   /// Get the ELF code for recognizing the compatible image binary.
   uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; }
 
+  Triple::ArchType getTripleArch() const override {
+    // TODO: I think we can drop the support for 32-bit NVPTX devices.
+    return Triple::nvptx64;
+  }
+
   /// Check whether the image is compatible with the available CUDA devices.
   Expected<bool> isImageCompatible(__tgt_image_info *Info) const override {
     for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
diff --git a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
--- a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -364,6 +364,10 @@
   Expected<bool> isImageCompatible(__tgt_image_info *Info) const override {
     return true;
   }
+
+  Triple::ArchType getTripleArch() const override {
+    return Triple::LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE;
+  }
 };
 
 GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); }
diff --git a/openmp/libomptarget/test/lit.cfg b/openmp/libomptarget/test/lit.cfg
--- a/openmp/libomptarget/test/lit.cfg
+++ b/openmp/libomptarget/test/lit.cfg
@@ -34,6 +34,15 @@
     else:
         config.environment[name] = value
 
+# Evalute the environment variable which is a string boolean value.
+def evaluate_bool_env(env):
+    env = env.lower()
+    possible_true_values = ["on", "true", "1"]
+    for v in possible_true_values:
+        if env == v:
+            return True
+    return False
+
 # name: The name of this test suite.
 config.name = 'libomptarget :: ' + config.libomptarget_current_target
 
@@ -111,10 +120,17 @@
         config.test_flags += " --libomptarget-nvptx-bc-path=" + config.library_dir
     if config.libomptarget_current_target.endswith('-LTO'):
         config.test_flags += " -foffload-lto"
+    if config.libomptarget_current_target.endswith('-JIT-LTO') and evaluate_bool_env(
+        config.environment['LIBOMPTARGET_NEXTGEN_PLUGINS']
+    ):
+        config.test_flags += " -foffload-lto"
+        config.test_flags += " -Wl,--embed-bitcode"
 
 def remove_suffix_if_present(name):
     if name.endswith('-LTO'):
         return name[:-4]
+    elif name.endswith('-JIT-LTO'):
+        return name[:-8]
     else:
         return name