diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h --- a/mlir/include/mlir/Dialect/GPU/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Passes.h @@ -25,8 +25,9 @@ namespace mlir { /// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into /// a separate kernel function. -std::unique_ptr> -createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef()); +std::unique_ptr> createGpuKernelOutliningPass( + StringRef dataLayoutStr = StringRef(), + std::function isSinkingBeneficiary = nullptr); /// Rewrites a function region so that GPU ops execute asynchronously. std::unique_ptr> createGpuAsyncRegionPass(); diff --git a/mlir/include/mlir/Dialect/GPU/Utils.h b/mlir/include/mlir/Dialect/GPU/Utils.h --- a/mlir/include/mlir/Dialect/GPU/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/Utils.h @@ -38,7 +38,9 @@ /// Sink operations into the `launchOp` to reduce the number of values that are /// used within the region of the operation, but defined outside of the /// region. -LogicalResult sinkOperationsIntoLaunchOp(gpu::LaunchOp launchOp); +LogicalResult sinkOperationsIntoLaunchOp( + gpu::LaunchOp launchOp, + llvm::function_ref isSinkingBeneficiary = nullptr); } // namespace mlir #endif // MLIR_DIALECT_GPU_UTILS_H_ diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -59,7 +59,7 @@ /// Identifies operations that are beneficial to sink into kernels. These /// operations may not have side-effects, as otherwise sinking (and hence /// duplicating them) is not legal. -static bool isSinkingBeneficiary(Operation *op) { +static bool isSinkingBeneficiaryDefault(Operation *op) { return isa(op); } @@ -75,11 +75,11 @@ /// the order they should appear in the kernel. Furthermore, `availableValues` /// is updated with results that will be available after sinking the identified /// ops. -static bool -extractBeneficiaryOps(Operation *op, - const SetVector &existingDependencies, - SetVector &beneficiaryOps, - llvm::SmallPtrSetImpl &availableValues) { +static bool extractBeneficiaryOps( + Operation *op, const SetVector &existingDependencies, + SetVector &beneficiaryOps, + llvm::SmallPtrSetImpl &availableValues, + llvm::function_ref isSinkingBeneficiary) { if (beneficiaryOps.count(op)) return true; @@ -93,9 +93,9 @@ // Else check whether it can be made available via sinking or already is a // dependency. Operation *definingOp = operand.getDefiningOp(); - if ((!definingOp || - !extractBeneficiaryOps(definingOp, existingDependencies, - beneficiaryOps, availableValues)) && + if ((!definingOp || !extractBeneficiaryOps(definingOp, existingDependencies, + beneficiaryOps, availableValues, + isSinkingBeneficiary)) && !existingDependencies.count(operand)) return false; } @@ -106,7 +106,9 @@ return true; } -LogicalResult mlir::sinkOperationsIntoLaunchOp(gpu::LaunchOp launchOp) { +LogicalResult mlir::sinkOperationsIntoLaunchOp( + gpu::LaunchOp launchOp, + llvm::function_ref isSinkingBeneficiary) { Region &launchOpBody = launchOp.body(); // Identify uses from values defined outside of the scope of the launch @@ -120,7 +122,9 @@ Operation *operandOp = operand.getDefiningOp(); if (!operandOp) continue; - extractBeneficiaryOps(operandOp, sinkCandidates, toBeSunk, availableValues); + extractBeneficiaryOps(operandOp, sinkCandidates, toBeSunk, availableValues, + isSinkingBeneficiary ? isSinkingBeneficiary + : isSinkingBeneficiaryDefault); } // Insert operations so that the defs get cloned before uses. @@ -240,13 +244,16 @@ class GpuKernelOutliningPass : public GpuKernelOutliningBase { public: - GpuKernelOutliningPass(StringRef dlStr) { + GpuKernelOutliningPass(StringRef dlStr, + std::function isSinkingBeneficiary_) + : isSinkingBeneficiary(std::move(isSinkingBeneficiary_)) { if (!dlStr.empty() && !dataLayoutStr.hasValue()) dataLayoutStr = dlStr.str(); } GpuKernelOutliningPass(const GpuKernelOutliningPass &other) - : dataLayoutSpec(other.dataLayoutSpec) { + : dataLayoutSpec(other.dataLayoutSpec), + isSinkingBeneficiary(other.isSinkingBeneficiary) { dataLayoutStr = other.dataLayoutStr; } @@ -277,8 +284,13 @@ Twine(op->getParentOfType().getName(), "_kernel").str(); // Pull in instructions that can be sunk - if (failed(sinkOperationsIntoLaunchOp(op))) - return WalkResult::interrupt(); + if (isSinkingBeneficiary) { + if (failed(sinkOperationsIntoLaunchOp(op, isSinkingBeneficiary))) + return WalkResult::interrupt(); + } else { + if (failed(sinkOperationsIntoLaunchOp(op))) + return WalkResult::interrupt(); + } gpu::GPUFuncOp outlinedFunc = outlineKernelFuncImpl(op, kernelFnName, operands); @@ -352,11 +364,14 @@ "attached to the GPU kernel module")}; DataLayoutSpecInterface dataLayoutSpec; + std::function isSinkingBeneficiary; }; } // namespace -std::unique_ptr> -mlir::createGpuKernelOutliningPass(StringRef dataLayoutStr) { - return std::make_unique(dataLayoutStr); +std::unique_ptr> mlir::createGpuKernelOutliningPass( + StringRef dataLayoutStr, + std::function isSinkingBeneficiary) { + return std::make_unique( + dataLayoutStr, std::move(isSinkingBeneficiary)); }