diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h --- a/mlir/include/mlir/Dialect/GPU/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Passes.h @@ -87,6 +87,12 @@ *this, "gpu-binary-annotation", llvm::cl::desc("Annotation attribute string for GPU binary"), llvm::cl::init(getDefaultGpuBinaryAnnotation())}; + Option dumpAsm{ + *this, "dump-asm", + llvm::cl::desc("Whether the final generated instructions or intermediate " + "IR (if stopping early) for a kernel should be dumped to " + "the debug stream"), + llvm::cl::init(false)}; }; } // namespace gpu @@ -102,6 +108,14 @@ /// annotation. void registerGpuSerializeToHsacoPass(); +/// Create an instance of the GPU kernel function to HSAco binary serialization +/// pass +std::unique_ptr createGpuSerializeToHsacoPass(StringRef triple, + StringRef arch, + StringRef features, + int optLevel, + bool dumpAsm = false); + /// Generate the code for registering passes. #define GEN_PASS_REGISTRATION #include "mlir/Dialect/GPU/Passes.h.inc" diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp @@ -21,8 +21,15 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetMachine.h" +#include +#include + using namespace mlir; +// Ensure multiple threads don't try to simultaneously dump the assembly for +// separate modules +static std::mutex dumpAsmLock; + std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; } gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID) @@ -76,6 +83,12 @@ std::string targetISA = std::move(maybeTargetISA.getValue()); + if (dumpAsm.getValue()) { + const std::lock_guard lock(dumpAsmLock); + llvm::dbgs() << targetISA << "\n"; + llvm::dbgs().flush(); + } + // Serialize the target ISA. std::unique_ptr> blob = serializeISA(targetISA); if (!blob) diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp @@ -61,7 +61,7 @@ : public PassWrapper { public: SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features, - int optLevel); + int optLevel, bool dumpAsm = false); SerializeToHsacoPass(const SerializeToHsacoPass &other); StringRef getArgument() const override { return "gpu-to-hsaco"; } StringRef getDescription() const override { @@ -127,12 +127,15 @@ } SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch, - StringRef features, int optLevel) { + StringRef features, int optLevel, + bool dumpAsm) { maybeSetOption(this->triple, [&triple] { return triple.str(); }); maybeSetOption(this->chip, [&arch] { return arch.str(); }); maybeSetOption(this->features, [&features] { return features.str(); }); if (this->optLevel.getNumOccurrences() == 0) this->optLevel.setValue(optLevel); + if (this->dumpAsm.getNumOccurrences() == 0 && dumpAsm) + this->dumpAsm.setValue(dumpAsm); } void SerializeToHsacoPass::getDependentDialects( @@ -479,6 +482,18 @@ "", 2); }); } + +/// Create an instance of the GPU kernel function to HSAco binary serialization +/// pass +std::unique_ptr mlir::createGpuSerializeToHsacoPass(StringRef triple, + StringRef arch, + StringRef features, + int optLevel, + bool dumpASM) { + return std::make_unique(triple, arch, features, + optLevel, dumpASM); +} + #else // MLIR_GPU_TO_HSACO_PASS_ENABLE void mlir::registerGpuSerializeToHsacoPass() {} #endif // MLIR_GPU_TO_HSACO_PASS_ENABLE diff --git a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir b/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir --- a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir +++ b/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir @@ -1,6 +1,9 @@ // RUN: mlir-opt %s --test-gpu-to-hsaco | FileCheck %s +// RUN: mlir-opt %s --test-gpu-to-hsaco=dump-asm=true 2>&1 |\ +// RUN: FileCheck %s --check-prefix=CHECK-ASM // CHECK: gpu.module @foo attributes {gpu.binary = "HSACO"} +// CHECK-ASM: .globl kernel gpu.module @foo { llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr) // CHECK: attributes {gpu.kernel} @@ -23,3 +26,4 @@ llvm.return } } +// CHECK-ASM: amdhsa.target: