diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -97,7 +97,7 @@
 
 # Build the CUDA conversions and run according tests if the NVPTX backend
 # is available
-if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD AND MLIR_ENABLE_EXECUTION_ENGINE)
+if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
   set(MLIR_ENABLE_CUDA_CONVERSIONS 1)
 else()
   set(MLIR_ENABLE_CUDA_CONVERSIONS 0)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
@@ -15,6 +15,7 @@
 #define MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_
 
 #include "mlir/Bytecode/BytecodeOpInterface.h"
+#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpDefinition.h"
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -16,6 +16,7 @@
 include "mlir/IR/EnumAttr.td"
 include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
 
 def LLVM_i8Ptr_global : LLVM_IntPtrBase<8, 1>;
 def LLVM_i8Ptr_shared : LLVM_IntPtrBase<8, 3>;
@@ -1472,4 +1473,73 @@
   }];
 }
 
+//===----------------------------------------------------------------------===//
+// NVVM target attribute.
+//===----------------------------------------------------------------------===//
+
+def NVVM_TargettAttr :
+    NVVM_Attr<"NVVMTarget", "target", []> {
+  let description = [{
+    GPU target attribute for controlling compilation of NVIDIA targets. All
+    parameters decay into default values if not present.
+
+    Examples:
+
+    1. Target with default values.
+    ```
+      gpu.module @mymodule [#nvvm.target] attributes {...} {
+        ...
+      }
+    ```
+
+    2. Target with `sm_90` chip and fast math.
+    ```
+      gpu.module @mymodule [#nvvm.target<chip = "sm_90", flags = {fast}>] {
+        ...
+      }
+    ```
+  }];
+  let parameters = (ins
+    DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O,
+    StringRefParameter<"Target triple.", "\"nvptx64-nvidia-cuda\"">:$triple,
+    StringRefParameter<"Target chip.", "\"sm_50\"">:$chip,
+    StringRefParameter<"Target chip features.", "\"+ptx60\"">:$features,
+    OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags,
+    OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link
+  );
+  let assemblyFormat = [{
+    (`<` struct($O, $triple, $chip, $features, $flags)^ `>`)?
+  }];
+  let builders = [
+    AttrBuilder<(ins CArg<"int", "2">:$optLevel,
+                     CArg<"StringRef", "\"nvptx64-nvidia-cuda\"">:$triple,
+                     CArg<"StringRef", "\"sm_50\"">:$chip,
+                     CArg<"StringRef", "\"+ptx60\"">:$features,
+                     CArg<"DictionaryAttr", "nullptr">:$targetFlags,
+                     CArg<"ArrayAttr", "nullptr">:$linkFiles), [{
+      return Base::get($_ctxt, optLevel, triple, chip, features, targetFlags, linkFiles);
+    }]>
+  ];
+  let skipDefaultBuilders = 1;
+  let genVerifyDecl = 1;
+  let extraClassDeclaration = [{
+    bool hasFlag(StringRef flag) const;
+    bool hasFastMath() const;
+    bool hasFtz() const;
+  }];
+  let extraClassDefinition = [{
+    bool $cppClass::hasFlag(StringRef flag) const {
+      if (DictionaryAttr flags = getFlags())
+        return flags.get(flag) != nullptr;
+      return false;
+    }
+    bool $cppClass::hasFastMath() const {
+      return hasFlag("fast");
+    }
+    bool $cppClass::hasFtz() const {
+      return hasFlag("ftz");
+    }
+  }];
+}
+
 #endif // NVVMIR_OPS
diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h
--- a/mlir/include/mlir/InitAllExtensions.h
+++ b/mlir/include/mlir/InitAllExtensions.h
@@ -15,6 +15,7 @@
 #define MLIR_INITALLEXTENSIONS_H_
 
 #include "mlir/Dialect/Func/Extensions/AllExtensions.h"
+#include "mlir/Target/LLVM/NVVM/Target.h"
 
 #include <cstdlib>
 
@@ -27,6 +28,7 @@
 /// pipelines and transformations you are using.
 inline void registerAllExtensions(DialectRegistry &registry) {
   func::registerAllExtensions(registry);
+  registerNVVMTarget(registry);
 }
 
 } // namespace mlir
diff --git a/mlir/include/mlir/Target/LLVM/NVVM/Target.h b/mlir/include/mlir/Target/LLVM/NVVM/Target.h
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Target/LLVM/NVVM/Target.h
@@ -0,0 +1,28 @@
+//===- Target.h - MLIR NVVM target registration -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides registration calls for attaching the NVVM target interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TARGET_LLVM_NVVM_TARGET_H
+#define MLIR_TARGET_LLVM_NVVM_TARGET_H
+
+namespace mlir {
+class DialectRegistry;
+class MLIRContext;
+/// Registers the `TargetAttrInterface` for the `#nvvm.target` attribute in the
+/// given registry.
+void registerNVVMTarget(DialectRegistry &registry);
+
+/// Registers the `TargetAttrInterface` for the `#nvvm.target` attribute in the
+/// registry associated with the given context.
+void registerNVVMTarget(MLIRContext &context);
+} // namespace mlir
+
+#endif // MLIR_TARGET_LLVM_NVVM_TARGET_H
diff --git a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h
@@ -0,0 +1,76 @@
+//===- Utils.h - MLIR NVVM target compilation ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This files declares NVVM target related utility classes and functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TARGET_LLVM_NVVM_UTILS_H
+#define MLIR_TARGET_LLVM_NVVM_UTILS_H
+
+#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/Target/LLVM/ModuleToObject.h"
+
+namespace mlir {
+class DialectRegistry;
+class MLIRContext;
+namespace NVVM {
+/// Searches & returns the path CUDA toolkit path, the search order is:
+/// 1. The `CUDA_ROOT` environment variable.
+/// 2. The `CUDA_HOME` environment variable.
+/// 3. The `CUDA_PATH` environment variable.
+/// 4. The CUDA toolkit path detected by CMake.
+/// 5. Returns an empty string.
+StringRef getCUDAToolkitPath();
+
+/// Base class for all NVVM serializations from GPU modules into binary strings.
+/// By default this class serializes into LLVM bitcode.
+class SerializeGPUModuleBase : public LLVM::ModuleToObject {
+public:
+  /// Initializes the `toolkitPath` with the path in `targetOptions` or if empty
+  /// with the path in `getCUDAToolkitPath`.
+  SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target,
+                         const gpu::TargetOptions &targetOptions = {});
+
+  /// Initializes the LLVM NVPTX target by safely calling `LLVMInitializeNVPTX*`
+  /// methods if available.
+  static void init();
+
+  /// Returns the target attribute.
+  NVVMTargetAttr getTarget() const;
+
+  /// Returns the CUDA toolkit path.
+  StringRef getToolkitPath() const;
+
+  /// Returns the bitcode files to be loaded.
+  ArrayRef<std::string> getFileList() const;
+
+  /// Appends `nvvm/libdevice.bc` into `fileList`. Returns failure if the
+  /// library couldn't be found.
+  LogicalResult appendStandardLibs();
+
+  /// Loads the bitcode files in `fileList`.
+  virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
+  loadBitcodeFiles(llvm::Module &module,
+                   llvm::TargetMachine &targetMachine) override;
+
+protected:
+  /// NVVM target attribute.
+  NVVMTargetAttr target;
+
+  /// CUDA toolkit path.
+  std::string toolkitPath;
+
+  /// List of LLVM bitcode files to link to.
+  SmallVector<std::string> fileList;
+};
+} // namespace NVVM
+} // namespace mlir
+
+#endif // MLIR_TARGET_LLVM_NVVM_UTILS_H
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -721,6 +721,7 @@
   // Support unknown operations because not all NVVM operations are
   // registered.
   allowUnknownOperations();
+  declarePromisedInterface<gpu::TargetAttrInterface>();
 }
 
 LogicalResult NVVMDialect::verifyOperationAttribute(Operation *op,
@@ -759,6 +760,35 @@
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// NVVM target attribute.
+//===----------------------------------------------------------------------===//
+LogicalResult
+NVVMTargetAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+                       int optLevel, StringRef triple, StringRef chip,
+                       StringRef features, DictionaryAttr flags,
+                       ArrayAttr files) {
+  if (optLevel < 0 || optLevel > 3) {
+    emitError() << "The optimization level must be a number between 0 and 3.";
+    return failure();
+  }
+  if (triple.empty()) {
+    emitError() << "The target triple cannot be empty.";
+    return failure();
+  }
+  if (chip.empty()) {
+    emitError() << "The target chip cannot be empty.";
+    return failure();
+  }
+  if (files && llvm::all_of(files, [](::mlir::Attribute attr) {
+        return attr && mlir::isa<StringAttr>(attr);
+      })) {
+    emitError() << "All the elements in the `link` array must be strings.";
+    return failure();
+  }
+  return success();
+}
+
 #define GET_OP_CLASSES
 #include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc"
 
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -18,3 +18,73 @@
   MLIRExecutionEngineUtils
   MLIRTargetLLVMIRExport
 )
+
+if (MLIR_ENABLE_CUDA_CONVERSIONS)
+  set(NVPTX_LIBS
+    NVPTXCodeGen
+    NVPTXDesc
+    NVPTXInfo
+  )
+endif()
+
+add_mlir_dialect_library(MLIRNVVMTarget
+  NVVM/Target.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
+
+  LINK_COMPONENTS
+  Core
+  MC
+  Target
+  ${NVPTX_LIBS}
+
+  LINK_LIBS PUBLIC
+  MLIRIR
+  MLIRExecutionEngineUtils
+  MLIRSupport
+  MLIRGPUDialect
+  MLIRTargetLLVM
+  )
+
+if(MLIR_ENABLE_CUDA_RUNNER)
+  if(NOT MLIR_ENABLE_CUDA_CONVERSIONS)
+    message(SEND_ERROR
+      "Building mlir with cuda support requires the NVPTX backend")
+  endif()
+
+  # Configure CUDA language support. Using check_language first allows us to
+  # give a custom error message.
+  include(CheckLanguage)
+  check_language(CUDA)
+  if (CMAKE_CUDA_COMPILER)
+    enable_language(CUDA)
+  else()
+    message(SEND_ERROR
+      "Building mlir with cuda support requires a working CUDA install")
+  endif()
+
+  # Find the CUDA toolkit.
+  if (NOT DEFINED CUDAToolkit_ROOT)
+    find_package(CUDAToolkit)
+    get_filename_component(CUDAToolkit_ROOT ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE)
+  endif()
+  message(VERBOSE "MLIR Default CUDA toolkit path: ${CUDAToolkit_ROOT}")
+
+  # Enable the gpu to cubin target.
+  target_compile_definitions(obj.MLIRNVVMTarget
+    PRIVATE
+    MLIR_GPU_NVPTX_TARGET_ENABLED=1
+    __DEFAULT_CUDATOOLKIT_PATH__="${CUDAToolkit_ROOT}"
+  )
+
+  # Add CUDA headers includes and the libcuda.so library.
+  target_include_directories(obj.MLIRNVVMTarget
+    PRIVATE
+    ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
+  )
+  target_link_libraries(MLIRNVVMTarget
+    PRIVATE
+    ${CUDA_DRIVER_LIBRARY}
+  )
+endif()
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -0,0 +1,270 @@
+//===- NVVMTarget.h - MLIR LLVM NVVM target compilation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This files defines NVVM target related functions including registration
+// calls for the `#nvvm.target` compilation attribute.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Target/LLVM/NVVM/Target.h"
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/Target/LLVM/NVVM/Utils.h"
+#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Export.h"
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include <cstdlib>
+
+using namespace mlir;
+using namespace mlir::NVVM;
+
+#ifndef __DEFAULT_CUDATOOLKIT_PATH__
+#define __DEFAULT_CUDATOOLKIT_PATH__ ""
+#endif
+
+namespace {
+// Implementation of the `TargetAttrInterface` model.
+class NVVMTargetAttrImpl
+    : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
+public:
+  std::optional<SmallVector<char, 0>>
+  serializeToObject(Attribute attribute, Operation *module,
+                    const gpu::TargetOptions &options) const;
+};
+} // namespace
+
+// Register the NVVM dialect, the NVVM translation, the target interface and
+// call `LLVMInitializeNVPTX*` if possible.
+void mlir::registerNVVMTarget(DialectRegistry &registry) {
+  registerNVVMDialectTranslation(registry);
+  registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
+    NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
+  });
+  SerializeGPUModuleBase::init();
+}
+
+void mlir::registerNVVMTarget(MLIRContext &context) {
+  DialectRegistry registry;
+  registerNVVMTarget(registry);
+  context.appendDialectRegistry(registry);
+}
+
+// Search for the CUDA toolkit path.
+StringRef mlir::NVVM::getCUDAToolkitPath() {
+  if (const char *var = std::getenv("CUDA_ROOT"))
+    return var;
+  if (const char *var = std::getenv("CUDA_HOME"))
+    return var;
+  if (const char *var = std::getenv("CUDA_PATH"))
+    return var;
+  return __DEFAULT_CUDATOOLKIT_PATH__;
+}
+
+SerializeGPUModuleBase::SerializeGPUModuleBase(
+    Operation &module, NVVMTargetAttr target,
+    const gpu::TargetOptions &targetOptions)
+    : ModuleToObject(module, target.getTriple(), target.getChip(),
+                     target.getFeatures(), target.getO()),
+      target(target), toolkitPath(targetOptions.getToolkitPath()),
+      fileList(targetOptions.getBitcodeFiles()) {
+
+  // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
+  if (toolkitPath.empty())
+    toolkitPath = getCUDAToolkitPath();
+
+  // Append the files in the target attribute.
+  if (ArrayAttr files = target.getLink())
+    for (Attribute attr : files.getValue())
+      if (auto file = dyn_cast<StringAttr>(attr))
+        fileList.push_back(file.str());
+
+  // Append libdevice to the files to be loaded.
+  (void)appendStandardLibs();
+}
+
+void SerializeGPUModuleBase::init() {
+  static llvm::once_flag initializeBackendOnce;
+  llvm::call_once(initializeBackendOnce, []() {
+  // If the `NVPTX` LLVM target was built, initialize it.
+#if MLIR_CUDA_CONVERSIONS_ENABLED == 1
+    LLVMInitializeNVPTXTarget();
+    LLVMInitializeNVPTXTargetInfo();
+    LLVMInitializeNVPTXTargetMC();
+    LLVMInitializeNVPTXAsmPrinter();
+#endif
+  });
+}
+
+NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
+
+StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }
+
+ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
+  return fileList;
+}
+
+// Try to append `libdevice` from a CUDA toolkit installation.
+LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
+  StringRef pathRef = getToolkitPath();
+  if (pathRef.size()) {
+    SmallVector<char, 256> path;
+    path.insert(path.begin(), pathRef.begin(), pathRef.end());
+    pathRef = StringRef(path.data(), path.size());
+    if (!llvm::sys::fs::is_directory(pathRef)) {
+      getOperation().emitError() << "CUDA path: " << pathRef
+                                 << " does not exist or is not a directory.\n";
+      return failure();
+    }
+    llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc");
+    pathRef = StringRef(path.data(), path.size());
+    if (!llvm::sys::fs::is_regular_file(pathRef)) {
+      getOperation().emitError() << "LibDevice path: " << pathRef
+                                 << " does not exist or is not a file.\n";
+      return failure();
+    }
+    fileList.push_back(pathRef.str());
+  }
+  return success();
+}
+
+std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
+SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module,
+                                         llvm::TargetMachine &targetMachine) {
+  SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
+  if (failed(loadBitcodeFilesFromList(module.getContext(), targetMachine,
+                                      fileList, bcFiles, true)))
+    return std::nullopt;
+  return bcFiles;
+}
+
+#ifdef MLIR_GPU_NVPTX_TARGET_ENABLED
+#define DEBUG_TYPE "serialize-to-object"
+#include <cuda.h>
+
+static void emitCudaError(const llvm::Twine &expr, const char *buffer,
+                          CUresult result, Location loc) {
+  const char *error;
+  cuGetErrorString(result, &error);
+  emitError(loc, expr.concat(" failed with error code ")
+                     .concat(llvm::Twine{error})
+                     .concat("[")
+                     .concat(buffer)
+                     .concat("]"));
+}
+
+#define RETURN_ON_CUDA_ERROR(expr)                                             \
+  do {                                                                         \
+    if (auto status = (expr)) {                                                \
+      emitCudaError(#expr, jitErrorBuffer, status, loc);                       \
+      return {};                                                               \
+    }                                                                          \
+  } while (false)
+
+namespace {
+class SerializeToCubin : public SerializeGPUModuleBase {
+public:
+  using SerializeGPUModuleBase::SerializeGPUModuleBase;
+
+  std::optional<SmallVector<char, 0>>
+  moduleToObject(llvm::Module &llvmModule,
+                 llvm::TargetMachine &targetMachine) override;
+};
+} // namespace
+
+std::optional<SmallVector<char, 0>>
+SerializeToCubin::moduleToObject(llvm::Module &llvmModule,
+                                 llvm::TargetMachine &targetMachine) {
+  std::optional<std::string> serializedISA =
+      translateToISA(llvmModule, targetMachine);
+  if (!serializedISA) {
+    getOperation().emitError() << "Failed translating the module to ISA.";
+    return std::nullopt;
+  }
+
+  LLVM_DEBUG({
+    llvm::dbgs() << "ISA for module: "
+                 << dyn_cast<gpu::GPUModuleOp>(&getOperation()).getNameAttr()
+                 << "\n";
+    llvm::dbgs() << *serializedISA << "\n";
+    llvm::dbgs().flush();
+  });
+
+  auto loc = getOperation().getLoc();
+  char jitErrorBuffer[4096] = {0};
+
+  RETURN_ON_CUDA_ERROR(cuInit(0));
+
+  // Linking requires a device context.
+  CUdevice device;
+  RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
+  CUcontext context;
+  RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device));
+  CUlinkState linkState;
+
+  CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
+                               CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
+  void *jitOptionsVals[] = {jitErrorBuffer,
+                            reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
+
+  RETURN_ON_CUDA_ERROR(cuLinkCreate(2,              /* number of jit options */
+                                    jitOptions,     /* jit options */
+                                    jitOptionsVals, /* jit option values */
+                                    &linkState));
+
+  auto kernelName = dyn_cast<gpu::GPUModuleOp>(getOperation()).getName().str();
+  RETURN_ON_CUDA_ERROR(cuLinkAddData(
+      linkState, CUjitInputType::CU_JIT_INPUT_PTX,
+      const_cast<void *>(static_cast<const void *>(serializedISA->c_str())),
+      serializedISA->length(), kernelName.c_str(),
+      0,       /* number of jit options */
+      nullptr, /* jit options */
+      nullptr  /* jit option values */
+      ));
+
+  void *cubinData;
+  size_t cubinSize;
+  RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));
+
+  char *cubinAsChar = static_cast<char *>(cubinData);
+  auto result = SmallVector<char, 0>(cubinAsChar, cubinAsChar + cubinSize);
+
+  // This will also destroy the cubin data.
+  RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
+  RETURN_ON_CUDA_ERROR(cuCtxDestroy(context));
+  return result;
+}
+#endif
+
+std::optional<SmallVector<char, 0>>
+NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
+                                      const gpu::TargetOptions &options) const {
+  assert(module && "The module must be non null.");
+  if (!module)
+    return std::nullopt;
+  if (!mlir::isa<gpu::GPUModuleOp>(module)) {
+    module->emitError("Module must be a GPU module.");
+    return std::nullopt;
+  }
+#ifdef MLIR_GPU_NVPTX_TARGET_ENABLED
+  // TODO: Replace this serializer for one ending on LLVM or PTX.
+  SerializeToCubin serializer(*module, cast<NVVMTargetAttr>(attribute),
+                              options);
+#else
+  // Serialize to LLVM bitcode.
+  SerializeGPUModuleBase serializer(*module, cast<NVVMTargetAttr>(attribute),
+                                    options);
+#endif
+  return serializer.run();
+}
diff --git a/mlir/unittests/Target/LLVM/CMakeLists.txt b/mlir/unittests/Target/LLVM/CMakeLists.txt
--- a/mlir/unittests/Target/LLVM/CMakeLists.txt
+++ b/mlir/unittests/Target/LLVM/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_unittest(MLIRTargetLLVMTests
+  SerializeNVVMTarget.cpp
   SerializeToLLVMBitcode.cpp
 )
 
diff --git a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
@@ -0,0 +1,70 @@
+//===- SerializeNVVMTarget.cpp ----------------------------------*- C++ -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/InitAllDialects.h"
+#include "mlir/Parser/Parser.h"
+#include "mlir/Target/LLVM/NVVM/Target.h"
+#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
+
+#include "gmock/gmock.h"
+
+using namespace mlir;
+
+static struct LLVMInitializer {
+  LLVMInitializer() {
+    llvm::InitializeNativeTarget();
+    llvm::InitializeNativeTargetAsmPrinter();
+  }
+} initializer;
+
+TEST(MLIRTargetLLVM, SerializeNVVMModule) {
+  std::string moduleStr = R"mlir(
+  gpu.module @kernels {
+    llvm.func @kernel(%arg0: f32) attributes {gpu.kernel, nvvm.kernel} {
+      llvm.return
+    }
+  }
+  )mlir";
+
+  DialectRegistry registry;
+  registerAllDialects(registry);
+  registerBuiltinDialectTranslation(registry);
+  registerLLVMDialectTranslation(registry);
+  registerGPUDialectTranslation(registry);
+  registerNVVMTarget(registry);
+  MLIRContext context(registry);
+
+  OwningOpRef<ModuleOp> module =
+      parseSourceString<ModuleOp>(moduleStr, &context);
+  ASSERT_TRUE(!!module);
+
+  // Create a NVVM target.
+  NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context);
+
+  // Serialize the module.
+  auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
+  ASSERT_TRUE(!!serializer);
+  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
+    std::optional<SmallVector<char, 0>> object =
+        serializer.serializeToObject(gpuModule, {});
+    // Check that the serializer was successful.
+    ASSERT_TRUE(object != std::nullopt);
+    ASSERT_TRUE(object->size() > 0);
+  }
+}