diff --git a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir b/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
--- a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
+++ b/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s --test-kernel-to-cubin -split-input-file | FileCheck %s
+// RUN: mlir-opt %s --test-gpu-to-cubin | FileCheck %s
 
-// CHECK: attributes {nvvm.cubin = "CUBIN"}
+// CHECK: gpu.module @foo attributes {gpu.binary = "CUBIN"}
 gpu.module @foo {
   llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr<f32>)
     // CHECK: attributes  {gpu.kernel}
@@ -9,8 +9,7 @@
   }
 }
 
-// -----
-
+// CHECK: gpu.module @bar attributes {gpu.binary = "CUBIN"}
 gpu.module @bar {
   // CHECK: func @kernel_a
   llvm.func @kernel_a()
diff --git a/mlir/test/lib/Transforms/TestConvertGPUKernelToCubin.cpp b/mlir/test/lib/Transforms/TestConvertGPUKernelToCubin.cpp
--- a/mlir/test/lib/Transforms/TestConvertGPUKernelToCubin.cpp
+++ b/mlir/test/lib/Transforms/TestConvertGPUKernelToCubin.cpp
@@ -6,11 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/Dialect/GPU/Passes.h"
+
 #include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
 #include "llvm/Support/TargetSelect.h"
@@ -18,36 +16,53 @@
 using namespace mlir;
 
 #if MLIR_CUDA_CONVERSIONS_ENABLED
-static OwnedBlob compilePtxToCubinForTesting(const std::string &, Location,
-                                             StringRef) {
-  const char data[] = "CUBIN";
-  return std::make_unique<std::vector<char>>(data, data + sizeof(data) - 1);
+namespace {
+class TestSerializeToCubinPass
+    : public PassWrapper<TestSerializeToCubinPass, gpu::SerializeToBlobPass> {
+public:
+  TestSerializeToCubinPass();
+
+private:
+  void getDependentDialects(DialectRegistry &registry) const override;
+
+  // Serializes PTX to CUBIN.
+  std::unique_ptr<std::vector<char>>
+  serializeISA(const std::string &isa) override;
+};
+} // namespace
+
+TestSerializeToCubinPass::TestSerializeToCubinPass() {
+  this->triple = "nvptx64-nvidia-cuda";
+  this->chip = "sm_35";
+  this->features = "+ptx60";
+}
+
+void TestSerializeToCubinPass::getDependentDialects(
+    DialectRegistry &registry) const {
+  registerNVVMDialectTranslation(registry);
+  gpu::SerializeToBlobPass::getDependentDialects(registry);
 }
 
-static std::unique_ptr<llvm::Module>
-translateModuleToNVVMIR(Operation *m, llvm::LLVMContext &llvmContext,
-                        StringRef moduleName) {
-  registerLLVMDialectTranslation(*m->getContext());
-  registerNVVMDialectTranslation(*m->getContext());
-  return translateModuleToLLVMIR(m, llvmContext, moduleName);
+std::unique_ptr<std::vector<char>>
+TestSerializeToCubinPass::serializeISA(const std::string &) {
+  std::string data = "CUBIN";
+  return std::make_unique<std::vector<char>>(data.begin(), data.end());
 }
 
 namespace mlir {
 namespace test {
-void registerTestConvertGPUKernelToCubinPass() {
-  PassPipelineRegistration<>(
-      "test-kernel-to-cubin",
-      "Convert all kernel functions to CUDA cubin blobs",
-      [](OpPassManager &pm) {
+// Register test pass to serialize GPU module to a CUBIN binary annotation.
+void registerTestGpuSerializeToCubinPass() {
+  PassRegistration<TestSerializeToCubinPass> registerSerializeToCubin(
+      "test-gpu-to-cubin",
+      "Lower GPU kernel function to CUBIN binary annotations", [] {
         // Initialize LLVM NVPTX backend.
         LLVMInitializeNVPTXTarget();
         LLVMInitializeNVPTXTargetInfo();
         LLVMInitializeNVPTXTargetMC();
         LLVMInitializeNVPTXAsmPrinter();
 
-        pm.addPass(createConvertGPUKernelToBlobPass(
-            translateModuleToNVVMIR, compilePtxToCubinForTesting,
-            "nvptx64-nvidia-cuda", "sm_35", "+ptx60", "nvvm.cubin"));
+        return std::make_unique<TestSerializeToCubinPass>();
       });
 }
 } // namespace test
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -64,7 +64,7 @@
 void registerTestCallGraphPass();
 void registerTestConstantFold();
 void registerTestConvVectorization();
-void registerTestConvertGPUKernelToCubinPass();
+void registerTestGpuSerializeToCubinPass();
 void registerTestConvertGPUKernelToHsacoPass();
 void registerTestDecomposeCallGraphTypes();
 void registerTestDialect(DialectRegistry &);
@@ -136,7 +136,7 @@
   test::registerTestCallGraphPass();
   test::registerTestConstantFold();
 #if MLIR_CUDA_CONVERSIONS_ENABLED
-  test::registerTestConvertGPUKernelToCubinPass();
+  test::registerTestGpuSerializeToCubinPass();
 #endif
 #if MLIR_ROCM_CONVERSIONS_ENABLED
   test::registerTestConvertGPUKernelToHsacoPass();