diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h --- a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h +++ b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h @@ -26,9 +26,6 @@ namespace gpu { class GPUModuleOp; - -/// Returns the default annotation name for GPU binary blobs. -std::string getDefaultGpuBinaryAnnotation(); } // namespace gpu namespace LLVM { diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h --- a/mlir/include/mlir/Dialect/GPU/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Passes.h @@ -13,8 +13,15 @@ #ifndef MLIR_DIALECT_GPU_PASSES_H_ #define MLIR_DIALECT_GPU_PASSES_H_ +#include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Pass/Pass.h" +namespace llvm { +class TargetMachine; +class LLVMContext; +class Module; +} // namespace llvm + namespace mlir { /// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into /// a separate kernel function. @@ -33,6 +40,45 @@ populateGpuAllReducePatterns(context, patterns); } +namespace gpu { +/// Returns the default annotation name for GPU binary blobs. +std::string getDefaultGpuBinaryAnnotation(); + +/// Base pass class to serialize kernel functions through LLVM into +/// user-specified IR and add the resulting blob as module attribute. +class SerializeToBlobPass : public OperationPass { +public: + SerializeToBlobPass(TypeID passID); + SerializeToBlobPass(const SerializeToBlobPass &other); + + void runOnOperation() final; + +protected: + // Creates an LLVM target machine from given arguments. + static std::unique_ptr + createTargetMachine(Location loc, StringRef triple, StringRef chip, + StringRef features); + +private: + // Creates the LLVM target machine to generate the ISA. + virtual std::unique_ptr createTargetMachine() = 0; + + // Translates the 'getOperation()' result to an LLVM module. + virtual std::unique_ptr + translateToLLVMIR(llvm::LLVMContext &llvmContext) = 0; + + // Serializes the target ISA to binary form. + virtual std::unique_ptr> + serializeISA(const std::string isa) = 0; + +protected: + Option gpuBinaryAnnotation{ + *this, "gpu-binary-annotation", + llvm::cl::desc("Annotation attribute string for GPU binary"), + llvm::cl::init(getDefaultGpuBinaryAnnotation())}; +}; +} // namespace gpu + //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp b/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp --- a/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp @@ -15,6 +15,7 @@ #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Dialect/GPU/GPUDialect.h" +#include "mlir/Dialect/GPU/Passes.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" @@ -45,7 +46,7 @@ /// GPU binary code, which is then attached as an attribute to the function. /// The function body is erased. class GpuKernelToBlobPass - : public PassWrapper> { + : public PassWrapper { public: GpuKernelToBlobPass(LoweringCallback loweringCallback, BlobGenerator blobGenerator, StringRef triple, @@ -57,114 +58,36 @@ this->gpuBinaryAnnotation = gpuBinaryAnnotation.str(); } - GpuKernelToBlobPass(const GpuKernelToBlobPass &other) - : loweringCallback(other.loweringCallback), - blobGenerator(other.blobGenerator), triple(other.triple), - targetChip(other.targetChip), features(other.features) {} - - void runOnOperation() override { - gpu::GPUModuleOp module = getOperation(); - - // Lower the module to an LLVM IR module using a separate context to enable - // multi-threaded processing. - llvm::LLVMContext llvmContext; - std::unique_ptr llvmModule = - loweringCallback(module, llvmContext, "LLVMDialectModule"); - if (!llvmModule) - return signalPassFailure(); - - // Translate the llvm module to a target blob and attach the result as - // attribute to the module. - if (auto blobAttr = translateGPUModuleToBinaryAnnotation( - *llvmModule, module.getLoc(), module.getName())) - module->setAttr(gpuBinaryAnnotation, blobAttr); - else - signalPassFailure(); - } - private: - std::string translateModuleToISA(llvm::Module &module, - llvm::TargetMachine &targetMachine); + // Creates the LLVM target machine to generate the ISA. + std::unique_ptr createTargetMachine() override { + return gpu::SerializeToBlobPass::createTargetMachine( + getOperation()->getLoc(), triple, targetChip, features); + } - /// Converts llvmModule to a blob with target instructions using the - /// user-provided generator. Location is used for error reporting and name is - /// forwarded to the blob generator to use in its logging mechanisms. - OwnedBlob convertModuleToBlob(llvm::Module &llvmModule, Location loc, - StringRef name); + // Translates the 'getOperation()' result to an LLVM module. + std::unique_ptr + translateToLLVMIR(llvm::LLVMContext &llvmContext) override { + return loweringCallback(getOperation(), llvmContext, "LLVMDialectModule"); + } - /// Translates llvmModule to a blob with target instructions and returns the - /// result as attribute. - StringAttr translateGPUModuleToBinaryAnnotation(llvm::Module &llvmModule, - Location loc, StringRef name); + // Serializes the target ISA to binary form. + std::unique_ptr> + serializeISA(const std::string isa) override { + return blobGenerator(isa, getOperation()->getLoc(), + getOperation().getName()); + } LoweringCallback loweringCallback; BlobGenerator blobGenerator; - llvm::Triple triple; + std::string triple; std::string targetChip; std::string features; - - Option gpuBinaryAnnotation{ - *this, "gpu-binary-annotation", - llvm::cl::desc("Annotation attribute string for GPU binary"), - llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())}; }; } // anonymous namespace -std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; } - -std::string -GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module, - llvm::TargetMachine &targetMachine) { - std::string targetISA; - { - llvm::raw_string_ostream stream(targetISA); - llvm::buffer_ostream pstream(stream); - llvm::legacy::PassManager codegenPasses; - targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr, - llvm::CGFT_AssemblyFile); - codegenPasses.run(module); - } - - return targetISA; -} - -OwnedBlob GpuKernelToBlobPass::convertModuleToBlob(llvm::Module &llvmModule, - Location loc, - StringRef name) { - std::unique_ptr targetMachine; - { - std::string error; - const llvm::Target *target = - llvm::TargetRegistry::lookupTarget("", triple, error); - if (target == nullptr) { - emitError(loc, "cannot initialize target triple"); - return {}; - } - targetMachine.reset(target->createTargetMachine(triple.str(), targetChip, - features, {}, {})); - if (targetMachine == nullptr) { - emitError(loc, "cannot initialize target machine"); - return {}; - } - } - - llvmModule.setDataLayout(targetMachine->createDataLayout()); - - auto targetISA = translateModuleToISA(llvmModule, *targetMachine); - - return blobGenerator(targetISA, loc, name); -} - -StringAttr GpuKernelToBlobPass::translateGPUModuleToBinaryAnnotation( - llvm::Module &llvmModule, Location loc, StringRef name) { - auto blob = convertModuleToBlob(llvmModule, loc, name); - if (!blob) - return {}; - return StringAttr::get(loc->getContext(), {blob->data(), blob->size()}); -} - std::unique_ptr> mlir::createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback, BlobGenerator blobGenerator, diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp --- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp @@ -20,6 +20,7 @@ #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/Dialect/Async/IR/Async.h" #include "mlir/Dialect/GPU/GPUDialect.h" +#include "mlir/Dialect/GPU/Passes.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -5,6 +5,7 @@ Transforms/KernelOutlining.cpp Transforms/MemoryPromotion.cpp Transforms/ParallelLoopMapper.cpp + Transforms/SerializeToBlob.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp @@ -0,0 +1,90 @@ +//===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a base class for a pass to convert gpu kernel functions +// into a corresponding binary blob that can be executed on a GPU. Currently +// only translates the function itself but no dependencies. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/Passes.h" +#include "mlir/Pass/Pass.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" + +using namespace mlir; + +std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; } + +gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID) + : OperationPass(passID) {} + +gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other) + : OperationPass(other) {} + +static std::string translateToISA(llvm::Module &llvmModule, + llvm::TargetMachine &targetMachine) { + llvmModule.setDataLayout(targetMachine.createDataLayout()); + + std::string targetISA; + llvm::raw_string_ostream stream(targetISA); + llvm::buffer_ostream pstream(stream); + llvm::legacy::PassManager codegenPasses; + targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr, + llvm::CGFT_AssemblyFile); + codegenPasses.run(llvmModule); + return targetISA; +} + +void gpu::SerializeToBlobPass::runOnOperation() { + // Lower the module to an LLVM IR module using a separate context to enable + // multi-threaded processing. + llvm::LLVMContext llvmContext; + std::unique_ptr llvmModule = translateToLLVMIR(llvmContext); + if (!llvmModule) + return signalPassFailure(); + + // Lower the LLVM IR module to target ISA. + std::unique_ptr targetMachine = createTargetMachine(); + if (!targetMachine) + return signalPassFailure(); + + std::string targetISA = translateToISA(*llvmModule, *targetMachine); + + // Serialize the target ISA. + std::unique_ptr> blob = serializeISA(targetISA); + if (!blob) + return signalPassFailure(); + + // Add the blob as module attribute. + auto attr = StringAttr::get(&getContext(), {blob->data(), blob->size()}); + getOperation()->setAttr(gpuBinaryAnnotation, attr); +} + +std::unique_ptr +gpu::SerializeToBlobPass::createTargetMachine(Location loc, StringRef triple, + StringRef chip, + StringRef features) { + std::string error; + const llvm::Target *target = + llvm::TargetRegistry::lookupTarget(triple.str(), error); + if (!target) { + emitError(loc, Twine("failed to lookup target: ") + error); + return {}; + } + llvm::TargetMachine *machine = + target->createTargetMachine(triple.str(), chip, features, {}, {}); + if (!machine) { + emitError(loc, "failed to create target machine"); + return {}; + } + + return std::unique_ptr{machine}; +} diff --git a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp --- a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp +++ b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp @@ -114,8 +114,7 @@ : public mlir::PassPipelineOptions { Option gpuBinaryAnnotation{ *this, "gpu-binary-annotation", - llvm::cl::desc("Annotation attribute string for GPU binary"), - llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())}; + llvm::cl::desc("Annotation attribute string for GPU binary")}; }; // Register cuda-runner specific passes.