Changeset View
Changeset View
Standalone View
Standalone View
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
//===- KernelOutlining.cpp - Implementation of GPU kernel outlining -------===// | //===- KernelOutlining.cpp - Implementation of GPU kernel outlining -------===// | ||||
Lint: Lint: clang-format-diff not found in user's PATH; not linting file. | |||||
// | // | ||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
// See https://llvm.org/LICENSE.txt for license information. | // See https://llvm.org/LICENSE.txt for license information. | ||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
// | // | ||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||
// | // | ||||
▲ Show 20 Lines • Show All 200 Lines • ▼ Show 20 Lines | |||||
/// | /// | ||||
/// This pass moves the kernel code of each LaunchOp into a function created | /// This pass moves the kernel code of each LaunchOp into a function created | ||||
/// inside a nested module. It also creates an external function of the same | /// inside a nested module. It also creates an external function of the same | ||||
/// name in the parent module. | /// name in the parent module. | ||||
/// | /// | ||||
/// The gpu.modules are intended to be compiled to a cubin blob independently in | /// The gpu.modules are intended to be compiled to a cubin blob independently in | ||||
/// a separate pass. The external functions can then be annotated with the | /// a separate pass. The external functions can then be annotated with the | ||||
/// symbol of the cubin accessor function. | /// symbol of the cubin accessor function. | ||||
class GpuKernelOutliningPass : public ModulePass<GpuKernelOutliningPass> { | class GpuKernelOutliningPass | ||||
: public OperationPass<GpuKernelOutliningPass, ModuleOp> { | |||||
public: | public: | ||||
/// Include the generated pass utilities. | /// Include the generated pass utilities. | ||||
#define GEN_PASS_GpuKernelOutlining | #define GEN_PASS_GpuKernelOutlining | ||||
#include "mlir/Dialect/GPU/Passes.h.inc" | #include "mlir/Dialect/GPU/Passes.h.inc" | ||||
void runOnModule() override { | void runOnOperation() override { | ||||
SymbolTable symbolTable(getModule()); | SymbolTable symbolTable(getOperation()); | ||||
bool modified = false; | bool modified = false; | ||||
for (auto func : getModule().getOps<FuncOp>()) { | for (auto func : getOperation().getOps<FuncOp>()) { | ||||
// Insert just after the function. | // Insert just after the function. | ||||
Block::iterator insertPt(func.getOperation()->getNextNode()); | Block::iterator insertPt(func.getOperation()->getNextNode()); | ||||
auto funcWalkResult = func.walk([&](gpu::LaunchOp op) { | auto funcWalkResult = func.walk([&](gpu::LaunchOp op) { | ||||
llvm::SetVector<Value> operands; | llvm::SetVector<Value> operands; | ||||
std::string kernelFnName = | std::string kernelFnName = | ||||
Twine(op.getParentOfType<FuncOp>().getName(), "_kernel").str(); | Twine(op.getParentOfType<FuncOp>().getName(), "_kernel").str(); | ||||
// Pull in instructions that can be sunk | // Pull in instructions that can be sunk | ||||
Show All 15 Lines | for (auto func : getOperation().getOps<FuncOp>()) { | ||||
}); | }); | ||||
if (funcWalkResult.wasInterrupted()) | if (funcWalkResult.wasInterrupted()) | ||||
return signalPassFailure(); | return signalPassFailure(); | ||||
} | } | ||||
// If any new module was inserted in this module, annotate this module as | // If any new module was inserted in this module, annotate this module as | ||||
// a container module. | // a container module. | ||||
if (modified) | if (modified) | ||||
getModule().setAttr(gpu::GPUDialect::getContainerModuleAttrName(), | getOperation().setAttr(gpu::GPUDialect::getContainerModuleAttrName(), | ||||
UnitAttr::get(&getContext())); | UnitAttr::get(&getContext())); | ||||
} | } | ||||
private: | private: | ||||
// Returns a gpu.module containing kernelFunc and all callees (recursive). | // Returns a gpu.module containing kernelFunc and all callees (recursive). | ||||
gpu::GPUModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc, | gpu::GPUModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc, | ||||
const SymbolTable &parentSymbolTable) { | const SymbolTable &parentSymbolTable) { | ||||
// TODO: This code cannot use an OpBuilder because it must be inserted into | // TODO: This code cannot use an OpBuilder because it must be inserted into | ||||
// a SymbolTable by the caller. SymbolTable needs to be refactored to | // a SymbolTable by the caller. SymbolTable needs to be refactored to | ||||
// prevent manual building of Ops with symbols in code using SymbolTables | // prevent manual building of Ops with symbols in code using SymbolTables | ||||
// and then this needs to use the OpBuilder. | // and then this needs to use the OpBuilder. | ||||
auto context = getModule().getContext(); | auto context = getOperation().getContext(); | ||||
Builder builder(context); | Builder builder(context); | ||||
OperationState state(kernelFunc.getLoc(), | OperationState state(kernelFunc.getLoc(), | ||||
gpu::GPUModuleOp::getOperationName()); | gpu::GPUModuleOp::getOperationName()); | ||||
gpu::GPUModuleOp::build(&builder, state, kernelFunc.getName()); | gpu::GPUModuleOp::build(&builder, state, kernelFunc.getName()); | ||||
auto kernelModule = cast<gpu::GPUModuleOp>(Operation::create(state)); | auto kernelModule = cast<gpu::GPUModuleOp>(Operation::create(state)); | ||||
SymbolTable symbolTable(kernelModule); | SymbolTable symbolTable(kernelModule); | ||||
symbolTable.insert(kernelFunc); | symbolTable.insert(kernelFunc); | ||||
Show All 27 Lines |
clang-format-diff not found in user's PATH; not linting file.