diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h --- a/mlir/include/mlir/Dialect/GPU/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Passes.h @@ -54,14 +54,23 @@ protected: void getDependentDialects(DialectRegistry ®istry) const override; -private: - /// Creates the LLVM target machine to generate the ISA. - std::unique_ptr createTargetMachine(); + /// Translates the module to ISA + virtual Optional + translateToISA(llvm::Module &llvmModule, llvm::TargetMachine &targetMachine); + + /// Hook allowing the application of optimizations before codegen + /// By default, does nothing + virtual LogicalResult optimizeLlvm(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine); /// Translates the 'getOperation()' result to an LLVM module. virtual std::unique_ptr translateToLLVMIR(llvm::LLVMContext &llvmContext); +private: + /// Creates the LLVM target machine to generate the ISA. + std::unique_ptr createTargetMachine(); + /// Serializes the target ISA to binary form. virtual std::unique_ptr> serializeISA(const std::string &isa) = 0; diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -162,6 +162,7 @@ target_link_libraries(MLIRGPUOps PRIVATE lldELF + MLIRExecutionEngine MLIRROCDLToLLVMIRTranslation ) diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp @@ -31,18 +31,28 @@ gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other) : OperationPass(other) {} -static std::string translateToISA(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) { +Optional +gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) { llvmModule.setDataLayout(targetMachine.createDataLayout()); + if (failed(optimizeLlvm(llvmModule, targetMachine))) { + return llvm::None; + } std::string targetISA; llvm::raw_string_ostream stream(targetISA); - llvm::buffer_ostream pstream(stream); + llvm::legacy::PassManager codegenPasses; - targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr, - llvm::CGFT_AssemblyFile); - codegenPasses.run(llvmModule); - return targetISA; + + { // Drop pstream after this to prevent the ISA from being stuck buffering + llvm::buffer_ostream pstream(stream); + if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr, + llvm::CGFT_AssemblyFile)) { + return llvm::None; + } + codegenPasses.run(llvmModule); + } + return stream.str(); } void gpu::SerializeToBlobPass::runOnOperation() { @@ -58,7 +68,13 @@ if (!targetMachine) return signalPassFailure(); - std::string targetISA = translateToISA(*llvmModule, *targetMachine); + Optional maybeTargetISA = + translateToISA(*llvmModule, *targetMachine); + + if (!maybeTargetISA.hasValue()) { + return signalPassFailure(); + } + std::string targetISA = maybeTargetISA.getValue(); // Serialize the target ISA. std::unique_ptr> blob = serializeISA(targetISA); @@ -71,6 +87,14 @@ getOperation()->setAttr(gpuBinaryAnnotation, attr); } +LogicalResult +gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) { + // TODO: If serializeToCubin ends up defining optimizations, factor them + // into here from SerializeToHsaco + return success(); +} + void gpu::SerializeToBlobPass::getDependentDialects( DialectRegistry ®istry) const { registerLLVMDialectTranslation(registry); diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/GPU/Passes.h" #if MLIR_GPU_TO_HSACO_PASS_ENABLE +#include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" @@ -132,7 +133,8 @@ } SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch, - StringRef features) { + StringRef features, int optLevel) + : optLevel(optLevel) { maybeSetOption(this->triple, [&triple] { return triple.str(); }); maybeSetOption(this->chip, [&arch] { return arch.str(); }); maybeSetOption(this->features, [&features] { return features.str(); }); @@ -144,6 +146,30 @@ gpu::SerializeToBlobPass::getDependentDialects(registry); } +LogicalResult +SerializeToHsacoPass::optimizeLlvm(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) { + if (optLevel < 0 || optLevel > 3) { + llvm::errs() << "Invalid optimization level passed to SerializeToHsaco: " + << optLevel << "\n"; + return failure(); + } + targetMachine.setOptLevel(static_cast(optLevel)); + + auto transformer = + makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine); + auto error = transformer(&llvmModule); + if (error) { + llvm::handleAllErrors(std::move(error), [](const llvm::ErrorInfoBase &ei) { + llvm::errs() << "Could not optimize LLVM IR: "; + ei.log(llvm::errs()); + llvm::errs() << "\n"; + }); + return failure(); + } + return success(); +} + std::unique_ptr> SerializeToHsacoPass::assembleIsa(const std::string &isa) { auto loc = getOperation().getLoc(); @@ -280,7 +306,7 @@ LLVMInitializeAMDGPUTargetMC(); return std::make_unique("amdgcn-amd-amdhsa", "", - ""); + "", 2); }); } #else // MLIR_GPU_TO_HSACO_PASS_ENABLE