diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h @@ -117,6 +117,9 @@ *this, "gpu-binary-annotation", llvm::cl::desc("Annotation attribute string for GPU binary"), llvm::cl::init(getDefaultGpuBinaryAnnotation())}; + Option dumpPtx{*this, "dump-ptx", + ::llvm::cl::desc("Dump generated PTX"), + llvm::cl::init(false)}; }; } // namespace gpu @@ -137,7 +140,8 @@ std::unique_ptr createGpuSerializeToCubinPass(StringRef triple, StringRef chip, StringRef features, - int optLevel = 2); + int optLevel = 2, + bool dumpPtx = false); /// Create an instance of the GPU kernel function to HSAco binary serialization /// pass. diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp @@ -50,7 +50,7 @@ SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda", StringRef chip = "sm_35", StringRef features = "+ptx60", - int optLevel = 2); + int optLevel = 2, bool dumpPtx = false); StringRef getArgument() const override { return "gpu-to-cubin"; } StringRef getDescription() const override { @@ -73,10 +73,12 @@ } SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip, - StringRef features, int optLevel) { + StringRef features, int optLevel, + bool dumpPtx) { maybeSetOption(this->triple, triple); maybeSetOption(this->chip, chip); maybeSetOption(this->features, features); + this->dumpPtx = dumpPtx; if (this->optLevel.getNumOccurrences() == 0) this->optLevel.setValue(optLevel); } @@ -112,6 +114,10 @@ &linkState)); auto kernelName = getOperation().getName().str(); + if (dumpPtx) { + llvm::errs() << " Kernel Name : [" << kernelName << "]\n"; + llvm::errs() << isa << "\n"; + } RETURN_ON_CUDA_ERROR(cuLinkAddData( linkState, CUjitInputType::CU_JIT_INPUT_PTX, const_cast(static_cast(isa.c_str())), isa.length(), @@ -151,9 +157,10 @@ std::unique_ptr mlir::createGpuSerializeToCubinPass(StringRef triple, StringRef arch, StringRef features, - int optLevel) { + int optLevel, + bool dumpPtx) { return std::make_unique(triple, arch, features, - optLevel); + optLevel, dumpPtx); } #else // MLIR_GPU_TO_CUBIN_PASS_ENABLE