diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
--- a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
+++ b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
@@ -26,6 +26,8 @@
 
 namespace gpu {
 class GPUModuleOp;
+
+std::string getDefaultGpuBinaryAnnotation();
 } // namespace gpu
 
 namespace LLVM {
@@ -45,12 +47,12 @@
 /// instead uses a small wrapper library that exports a stable and conveniently
 /// typed ABI on top of GPU runtimes such as CUDA or ROCm (HIP).
 std::unique_ptr<OperationPass<ModuleOp>>
-createGpuToLLVMConversionPass(StringRef gpuBinaryAnnotation = "");
+createGpuToLLVMConversionPass(StringRef gpuBinaryAnnotation = {});
 
 /// Collect a set of patterns to convert from the GPU dialect to LLVM.
 void populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter,
                                          OwningRewritePatternList &patterns,
-                                         StringRef gpuBinaryAnnotation);
+                                         StringRef gpuBinaryAnnotation = {});
 
 /// Creates a pass to convert kernel functions into GPU target object blobs.
 ///
@@ -78,7 +80,7 @@
 createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback,
                                  BlobGenerator blobGenerator, StringRef triple,
                                  StringRef targetChip, StringRef features,
-                                 StringRef gpuBinaryAnnotation);
+                                 StringRef gpuBinaryAnnotation = {});
 
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -106,10 +106,6 @@
   let summary = "Convert GPU dialect to LLVM dialect with GPU runtime calls";
   let constructor = "mlir::createGpuToLLVMConversionPass()";
   let dependentDialects = ["LLVM::LLVMDialect"];
-  let options = [
-    Option<"gpuBinaryAnnotation", "gpu-binary-annotation", "std::string",
-           "", "Annotation attribute string for GPU binary">,
-  ];
 }
 
 def LowerHostCodeToLLVM : Pass<"lower-host-to-llvm", "ModuleOp"> {
diff --git a/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp b/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp
--- a/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp
+++ b/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp
@@ -51,9 +51,20 @@
                       BlobGenerator blobGenerator, StringRef triple,
                       StringRef targetChip, StringRef features,
                       StringRef gpuBinaryAnnotation)
-      : loweringCallback(loweringCallback), blobGenerator(blobGenerator),
-        triple(triple), targetChip(targetChip), features(features),
-        blobAnnotation(gpuBinaryAnnotation) {}
+      : loweringCallback(loweringCallback), blobGenerator(blobGenerator) {
+    if (!triple.empty())
+      this->triple = llvm::Triple(triple);
+    if (!targetChip.empty())
+      this->targetChip = targetChip.str();
+    if (!features.empty())
+      this->features = features.str();
+    if (!gpuBinaryAnnotation.empty())
+      this->gpuBinaryAnnotation = gpuBinaryAnnotation.str();
+  }
+
+  GpuKernelToBlobPass(const GpuKernelToBlobPass &other)
+      : loweringCallback(other.loweringCallback),
+        blobGenerator(other.blobGenerator) {}
 
   void runOnOperation() override {
     gpu::GPUModuleOp module = getOperation();
@@ -70,7 +81,7 @@
     // attribute to the module.
     if (auto blobAttr = translateGPUModuleToBinaryAnnotation(
             *llvmModule, module.getLoc(), module.getName()))
-      module->setAttr(blobAnnotation, blobAttr);
+      module->setAttr(gpuBinaryAnnotation, blobAttr);
     else
       signalPassFailure();
   }
@@ -92,14 +103,26 @@
 
   LoweringCallback loweringCallback;
   BlobGenerator blobGenerator;
-  llvm::Triple triple;
-  std::string targetChip;
-  std::string features;
-  std::string blobAnnotation;
+
+  Option<llvm::Triple> triple{*this, "triple",
+                              ::llvm::cl::desc("Target triple")};
+
+  Option<std::string> targetChip{*this, "target-chip",
+                                 ::llvm::cl::desc("Target architecture")};
+
+  Option<std::string> features{*this, "features",
+                               ::llvm::cl::desc("Target features")};
+
+  Option<std::string> gpuBinaryAnnotation{
+      *this, "gpu-binary-annotation",
+      llvm::cl::desc("Annotation attribute string for GPU binary"),
+      llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())};
 };
 
 } // anonymous namespace
 
+std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }
+
 std::string
 GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module,
                                           llvm::TargetMachine &targetMachine) {
diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
--- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
+++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
@@ -48,8 +48,17 @@
       this->gpuBinaryAnnotation = gpuBinaryAnnotation.str();
   }
 
+  GpuToLLVMConversionPass(const GpuToLLVMConversionPass &other)
+      : GpuToLLVMConversionPassBase(other) {}
+
   // Run the dialect converter on the module.
   void runOnOperation() override;
+
+private:
+  Option<std::string> gpuBinaryAnnotation{
+      *this, "gpu-binary-annotation",
+      llvm::cl::desc("Annotation attribute string for GPU binary"),
+      llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())};
 };
 
 struct FunctionCallBuilder {
diff --git a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
--- a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
+++ b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
@@ -112,7 +112,8 @@
     : public mlir::PassPipelineOptions<GpuToCubinPipelineOptions> {
   Option<std::string> gpuBinaryAnnotation{
       *this, "gpu-binary-annotation",
-      llvm::cl::desc("Annotation attribute string for GPU binary")};
+      llvm::cl::desc("Annotation attribute string for GPU binary"),
+      llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())};
 };
 
 // Register cuda-runner specific passes.