diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h @@ -117,6 +117,9 @@ *this, "gpu-binary-annotation", llvm::cl::desc("Annotation attribute string for GPU binary"), llvm::cl::init(getDefaultGpuBinaryAnnotation())}; + Option dumpPtx{*this, "dump-ptx", + ::llvm::cl::desc("Dump generated PTX"), + llvm::cl::init(false)}; }; } // namespace gpu @@ -137,7 +140,8 @@ std::unique_ptr createGpuSerializeToCubinPass(StringRef triple, StringRef chip, StringRef features, - int optLevel = 2); + int optLevel = 2, + bool dumpPtx = false); /// Create an instance of the GPU kernel function to HSAco binary serialization /// pass. diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/GPU/Transforms/Passes.h" +#include "llvm/Support/Debug.h" #if MLIR_GPU_TO_CUBIN_PASS_ENABLE #include "mlir/Pass/Pass.h" @@ -50,7 +51,7 @@ SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda", StringRef chip = "sm_35", StringRef features = "+ptx60", - int optLevel = 2); + int optLevel = 2, bool dumpPtx = false); StringRef getArgument() const override { return "gpu-to-cubin"; } StringRef getDescription() const override { @@ -73,10 +74,12 @@ } SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip, - StringRef features, int optLevel) { + StringRef features, int optLevel, + bool dumpPtx) { maybeSetOption(this->triple, triple); maybeSetOption(this->chip, chip); maybeSetOption(this->features, features); + this->dumpPtx = dumpPtx; if (this->optLevel.getNumOccurrences() == 0) this->optLevel.setValue(optLevel); } @@ -112,6 +115,10 @@ &linkState)); auto kernelName = getOperation().getName().str(); + if (dumpPtx) { + llvm::dbgs() << " Kernel Name : [" << kernelName << "]\n"; + llvm::dbgs() << isa << "\n"; + } RETURN_ON_CUDA_ERROR(cuLinkAddData( linkState, CUjitInputType::CU_JIT_INPUT_PTX, const_cast(static_cast(isa.c_str())), isa.length(), @@ -151,9 +158,10 @@ std::unique_ptr mlir::createGpuSerializeToCubinPass(StringRef triple, StringRef arch, StringRef features, - int optLevel) { + int optLevel, + bool dumpPtx) { return std::make_unique(triple, arch, features, - optLevel); + optLevel, dumpPtx); } #else // MLIR_GPU_TO_CUBIN_PASS_ENABLE diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir @@ -0,0 +1,15 @@ +// RUN: mlir-opt %s \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{dump-ptx}))' \ +// RUN: 2>&1 | FileCheck %s + +// CHECK: Generated by LLVM NVPTX Back-End +// CHECK: .visible .func kernel_a() +// CHECK: ret; + +gpu.module @bar { + llvm.func @kernel_a() + attributes { gpu.kernel } { + llvm.return + } +}