diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h b/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h @@ -0,0 +1,165 @@ +//===- CodegenStrategy.h - Linalg programmable codegen strategy -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LINALG_TRANSFORMS_CODEGENSTRATEGY_H_ +#define MLIR_DIALECT_LINALG_TRANSFORMS_CODEGENSTRATEGY_H_ + +#include "mlir/Conversion/VectorToSCF/VectorToSCF.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" + +namespace mlir { + +class FuncOp; + +namespace linalg { + +/// Abstract Transformation class applied in a sequence that also handles state +/// through markers. +struct Transformation { + virtual ~Transformation() = default; + virtual OwningRewritePatternList + buildRewritePatterns(MLIRContext *context, linalg::LinalgMarker m) = 0; + linalg::LinalgMarker marker; +}; + +/// Promotion transformation enqueues a particular stage-1 pattern for +/// `Tile`with the appropriate `options`. +template +struct Tile : public Transformation { + explicit Tile(linalg::LinalgTilingOptions options) : options(options) {} + + OwningRewritePatternList + buildRewritePatterns(MLIRContext *context, linalg::LinalgMarker m) override { + OwningRewritePatternList tilingPatterns; + tilingPatterns.insert>( + context, options, m); + return tilingPatterns; + } + +private: + linalg::LinalgTilingOptions options; +}; + +/// Promotion transformation enqueues a particular stage-1 pattern for +/// `Promote`with the appropriate `options`. +template +struct Promote : public Transformation { + explicit Promote(linalg::LinalgPromotionOptions options) : options(options) {} + + OwningRewritePatternList + buildRewritePatterns(MLIRContext *context, linalg::LinalgMarker m) override { + OwningRewritePatternList promotionPatterns; + promotionPatterns.insert>( + context, options, m); + return promotionPatterns; + } + +private: + linalg::LinalgPromotionOptions options; +}; + +/// Vectorization transformation enqueues a particular stage-1 pattern for +/// `LinalgVectorizationPattern` as well as copy to vector +/// transfer rewrite forwarding patterns. +template +struct Vectorize : public Transformation { + OwningRewritePatternList + buildRewritePatterns(MLIRContext *context, linalg::LinalgMarker m) override { + OwningRewritePatternList vectorizationPatterns; + // FillOp may interfere with forwarding patterns atm, so we bump up the + // priority of LinalgCopyVTRForwardingPattern / + // LinalgCopyVTWForwardingPattern. + vectorizationPatterns + .insert>(context, m); + vectorizationPatterns.insert( + context, /*benefit=*/2); + return vectorizationPatterns; + } +}; + +/// Codegen strategy controls how a Linalg op is progressively lowered. +/// The application uses a 3-level staged patterns strategy which allows +/// ordering transformations by using the Linalg `applyStagedPatterns` function, +/// where: +/// 1. The first stage consists of the successive `tile`, `promote` and +/// `vectorize` patterns, applied sequentially. +/// 2. The second stage consists of common local canonicalization patterns +/// that are applied eagerly after each stage-1 pattern. +/// 3. the third stage consists of more global transformation, also applied +/// eagerly, after all stage-2 patterns. Such more global transformations +struct CodegenStrategy { + /// Append a pattern to add a level of tiling for `LinalgOpType` with tiling + /// `options`. + template + CodegenStrategy &tile(linalg::LinalgTilingOptions options) { + transformationSequence.emplace_back(new Tile(options)); + return *this; + } + /// Conditionally append a pattern to add a level of tiling for `LinalgOpType` + /// with tiling `options`. + template + CodegenStrategy &tileIf(bool b, linalg::LinalgTilingOptions options) { + return b ? tile(options) : *this; + } + /// Append a pattern to add a level of promotion for `LinalgOpType` with + /// promotion `options`. + template + CodegenStrategy &promote(linalg::LinalgPromotionOptions options) { + transformationSequence.emplace_back(new Promote(options)); + return *this; + } + /// Conditionally append a pattern to add a level of promotion for + /// `LinalgOpType` with promotion `options`. + template + CodegenStrategy &promoteIf(bool b, linalg::LinalgPromotionOptions options) { + return b ? promote(options) : *this; + return *this; + } + /// Append a pattern to rewrite `LinalgOpType` as a vector operation. + template + CodegenStrategy &vectorize() { + transformationSequence.emplace_back(new Vectorize()); + return *this; + } + /// Conditionally append a pattern to rewrite `LinalgOpType` as a vector + /// operation. + template + CodegenStrategy &vectorizeIf(bool b) { + return b ? vectorize() : *this; + return *this; + } + /// Configure the post staged-patterns late vector transformations. + CodegenStrategy & + setVectorTransformsOptions(vector::VectorTransformsOptions options) { + vectorTransformsOptions = options; + return *this; + } + /// Configure the post staged-patterns late vector.transfer to scf conversion. + CodegenStrategy & + setVectorTransferToSCFOptions(VectorTransferToSCFOptions options) { + vectorToSCFOptions = options; + return *this; + } + + /// Apply the transformation patterns in sequence with cleanup transformations + /// interleaved. + void transform(FuncOp func) const; + +private: + LogicalResult postPatternTransforms(Operation *func) const; + + vector::VectorTransformsOptions vectorTransformsOptions; + VectorTransferToSCFOptions vectorToSCFOptions; + SmallVector, 4> transformationSequence; +}; + +} // namespace linalg +} // namespace mlir + +#endif // MLIR_DIALECT_LINALG_TRANSFORMS_CODEGENSTRATEGY_H_ diff --git a/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt b/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt --- a/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt +++ b/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt @@ -10,7 +10,6 @@ LINK_LIBS PUBLIC MLIREDSC MLIRAffineEDSC - MLIRLinalgUtils MLIRLLVMIR MLIRTransforms ) diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -16,7 +16,6 @@ #include "../PassDetail.h" #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" -#include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/SCF/EDSC/Builders.h" #include "mlir/Dialect/SCF/EDSC/Intrinsics.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" @@ -41,6 +40,28 @@ using vector::TransferReadOp; using vector::TransferWriteOp; +// Return a list of Values that correspond to multiple AffineApplyOp, one for +// each result of `map`. Each `expr` in `map` is canonicalized and folded +// greedily according to its operands. +// TODO: factor out in a comon location that both linalg and vector can use. +static SmallVector +applyMapToValues(OpBuilder &b, Location loc, AffineMap map, ValueRange values) { + SmallVector res; + res.reserve(map.getNumResults()); + unsigned numDims = map.getNumDims(), numSym = map.getNumSymbols(); + // For each `expr` in `map`, applies the `expr` to the values extracted from + // ranges. If the resulting application can be folded into a Value, the + // folding occurs eagerly. Otherwise, an affine.apply operation is emitted. + for (auto expr : map.getResults()) { + AffineMap map = AffineMap::get(numDims, numSym, expr); + SmallVector operands(values.begin(), values.end()); + fullyComposeAffineMapAndOperands(&map, &operands); + canonicalizeMapAndOperands(&map, &operands); + res.push_back(b.createOrFold(loc, map, operands)); + } + return res; +} + namespace { /// Helper class captures the common information needed to lower N>1-D vector /// transfer operations (read and write). @@ -193,7 +214,8 @@ /// 1. Compute the indexings `majorIvs + majorOffsets` and save them in /// `majorIvsPlusOffsets`. -/// 2. Return a value of i1 that determines whether the first `majorIvs.rank()` +/// 2. Return a value of i1 that determines whether the first +/// `majorIvs.rank()` /// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`. static Value emitInBoundsCondition(PatternRewriter &rewriter, @@ -205,8 +227,8 @@ majorIvsPlusOffsets.reserve(majorIvs.size()); unsigned idx = 0; SmallVector bounds = - linalg::applyMapToValues(rewriter, xferOp.getLoc(), - xferOp.permutation_map(), memrefBounds.getUbs()); + applyMapToValues(rewriter, xferOp.getLoc(), xferOp.permutation_map(), + memrefBounds.getUbs()); for (auto it : llvm::zip(majorIvs, majorOffsets, bounds)) { Value iv = std::get<0>(it), off = std::get<1>(it), ub = std::get<2>(it); using namespace mlir::edsc::op; @@ -450,8 +472,8 @@ function_ref)> outOfBoundsFun = nullptr) { // Permute the incoming indices according to the permutation map. SmallVector indices = - linalg::applyMapToValues(rewriter, transfer.getLoc(), - transfer.permutation_map(), transfer.indices()); + applyMapToValues(rewriter, transfer.getLoc(), transfer.permutation_map(), + transfer.indices()); // Generate a bounds check if necessary. SmallVector majorIvsPlusOffsets; diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_dialect_library(MLIRLinalgTransforms + CodegenStrategy.cpp DropUnitDims.cpp Fusion.cpp FusionOnTensors.cpp @@ -31,6 +32,7 @@ MLIRPass MLIRStandard MLIRStandardToLLVM + MLIRTransforms MLIRTransformUtils MLIRVector ) diff --git a/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp b/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp @@ -0,0 +1,95 @@ +//===- CodegenStrategy.cpp - Linalg programmable codegen strategy ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements logic and helpers to expose Linalg transforms as +// composable rewrite patterns through a programmable CodegenStratecy object. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Transforms/CodegenStrategy.h" + +#include "mlir/Dialect/Linalg/Transforms/Hoisting.h" +#include "mlir/Dialect/Vector/VectorOps.h" +#include "mlir/Dialect/Vector/VectorTransforms.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/LoopUtils.h" +#include "mlir/Transforms/Passes.h" + +using namespace mlir; +using namespace mlir::linalg; + +#define DEBUG_TYPE "linalg-codegen-strategy" + +void mlir::linalg::CodegenStrategy::transform(FuncOp func) const { + MLIRContext *context = func.getContext(); + // Emplace patterns one at a time while also maintaining a simple chained + // state transition. + unsigned stepCount = 0; + SmallVector stage1Patterns; + auto zeroState = Identifier::get(std::to_string(stepCount), context); + auto currentState = zeroState; + for (auto &t : transformationSequence) { + auto nextState = Identifier::get(std::to_string(++stepCount), context); + auto marker = (currentState == zeroState) + ? linalg::LinalgMarker({}, nextState) + : linalg::LinalgMarker(currentState, nextState); + stage1Patterns.emplace_back(t->buildRewritePatterns(context, marker)); + currentState = nextState; + } + + OwningRewritePatternList stage2Patterns = + linalg::getLinalgTilingCanonicalizationPatterns(context); + stage2Patterns.insert(context); + + auto stage3Transforms = [](Operation *op) { + // Some of these may be too aggressive as a stage 3 that is applied on each + // stage 1 application and may have to be split out to post staged patterns + // application (in which case they could just be passes, TBD). + PassManager pm(op->getContext()); + pm.addPass(createLoopInvariantCodeMotionPass()); + if (failed(pm.run(op->getParentOfType()))) + llvm_unreachable("Unexpected failure in cleanup pass pipeline."); + promoteSingleIterationLoops(cast(op)); + hoistViewAllocOps(cast(op)); + hoistRedundantVectorTransfers(cast(op)); + return success(); + }; + linalg::applyStagedPatterns(func, stage1Patterns, stage2Patterns, + stage3Transforms); + + //===--------------------------------------------------------------------===// + // Post staged patterns transforms + //===--------------------------------------------------------------------===// + + ModuleOp module = func.getParentOfType(); + + // Programmatic splitting of slow/fast path vector transfers. + OwningRewritePatternList patterns; + patterns.insert( + context, vectorTransformsOptions); + applyPatternsAndFoldGreedily(module, patterns); + + // Programmatic controlled lowering of vector.contract only. + OwningRewritePatternList vectorContractLoweringPatterns; + vectorContractLoweringPatterns + .insert( + vectorTransformsOptions, context); + applyPatternsAndFoldGreedily(module, vectorContractLoweringPatterns); + + // Programmatic controlled lowering of vector.transfer only. + OwningRewritePatternList vectorToLoopsPatterns; + populateVectorToSCFConversionPatterns(vectorToLoopsPatterns, context, + vectorToSCFOptions); + applyPatternsAndFoldGreedily(module, vectorToLoopsPatterns); + + // Ensure we drop the marker in the end. + module.walk([](LinalgOp op) { + op.removeAttr(LinalgTransforms::kLinalgTransformMarker); + }); +} diff --git a/mlir/test/Dialect/Linalg/codegen-strategy.mlir b/mlir/test/Dialect/Linalg/codegen-strategy.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/Linalg/codegen-strategy.mlir @@ -0,0 +1,18 @@ +// RUN: mlir-opt %s -test-linalg-codegen-strategy="tile-sizes=2,4,8 vectorize vectorize-contraction-to=matrixintrinsics unroll-vector-transfers=true" | FileCheck %s +// RUN: mlir-opt %s -test-linalg-codegen-strategy="tile-sizes=16,32,64 promote promote-full-tile-pad register-tile-sizes=2,4,8 vectorize vectorize-contraction-to=outerproduct split-transfers=true unroll-vector-transfers=false" | FileCheck %s --check-prefix=OUTER + +// CHECK-LABEL: func @matmul( +// OUTER-LABEL: func @matmul( +func @matmul(%A: memref<1584x1584xf32>, %B: memref<1584x1584xf32>, %C: memref<1584x1584xf32>) { + linalg.matmul + ins(%A, %B: memref<1584x1584xf32>, memref<1584x1584xf32>) + outs(%C: memref<1584x1584xf32>) + + // CHECK: vector.matrix_multiply + // CHECK-SAME: {lhs_columns = 8 : i32, lhs_rows = 2 : i32, rhs_columns = 4 : i32} + // CHECK-SAME: (vector<16xf32>, vector<32xf32>) -> vector<8xf32> + + // OUTER: vector.outerproduct {{.*}} : vector<2xf32>, vector<4xf32> + return +} + diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -16,6 +16,7 @@ TestGpuMemoryPromotion.cpp TestGpuParallelLoopMapping.cpp TestInlining.cpp + TestLinalgCodegenStrategy.cpp TestLinalgFusionTransforms.cpp TestLinalgHoisting.cpp TestLinalgTransforms.cpp diff --git a/mlir/test/lib/Transforms/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Transforms/TestLinalgCodegenStrategy.cpp new file mode 100644 --- /dev/null +++ b/mlir/test/lib/Transforms/TestLinalgCodegenStrategy.cpp @@ -0,0 +1,150 @@ +//===- TestLinalgCodegenStrategy.cpp - Test Linalg codegen strategy -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements logic for testing the Linalg codegen strategy. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/GPU/GPUDialect.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/Linalg/Transforms/CodegenStrategy.h" +#include "mlir/Dialect/Linalg/Utils/Utils.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Vector/VectorOps.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" + +#include "llvm/ADT/SetVector.h" + +using namespace mlir; +using namespace mlir::linalg; + +namespace { +struct TestLinalgCodegenStrategy + : public PassWrapper { + TestLinalgCodegenStrategy() = default; + TestLinalgCodegenStrategy(const TestLinalgCodegenStrategy &pass) {} + + void getDependentDialects(DialectRegistry ®istry) const override { + // clang-format off + registry.insert(); + // clang-format on + } + + void runOnFunction() override; + + ListOption tileSizes{*this, "tile-sizes", + llvm::cl::MiscFlags::CommaSeparated, + llvm::cl::desc("Specifies the tile sizes.")}; + Option promote{ + *this, "promote", + llvm::cl::desc("Promote the tile into a small aligned memory buffer."), + llvm::cl::init(false)}; + Option promoteFullTile{ + *this, "promote-full-tile-pad", + llvm::cl::desc("Pad the small aligned memory buffer to the tile sizes."), + llvm::cl::init(false)}; + ListOption registerTileSizes{ + *this, "register-tile-sizes", llvm::cl::MiscFlags::CommaSeparated, + llvm::cl::desc( + "Specifies the size of the register tile that will be used " + " to vectorize")}; + Option registerPromote{ + *this, "register-promote", + llvm::cl::desc( + "Promote the register tile into a small aligned memory buffer."), + llvm::cl::init(false)}; + Option registerPromoteFullTile{ + *this, "register-promote-full-tile-pad", + llvm::cl::desc("Pad the small aligned memory buffer to the tile sizes."), + llvm::cl::init(false)}; + Option vectorize{ + *this, "vectorize", + llvm::cl::desc("Rewrite the linalg op as a vector operation."), + llvm::cl::init(false)}; + Option splitVectorTransfersTo{ + *this, "split-transfers", + llvm::cl::desc( + "Split vector transfers between slow (masked) and fast " + "(unmasked) variants. Possible options are:\n" + "\tnone: keep unsplit vector.transfer and pay the full price\n" + "\tlinalg-copy: use linalg.fill + linalg.copy for the slow path\n" + "\tvector-transfers: use extra small unmasked vector.transfer for" + " the slow path\n"), + llvm::cl::init("none")}; + Option vectorizeContractionTo{ + *this, "vectorize-contraction-to", + llvm::cl::desc("the type of vector op to use for linalg contractions"), + llvm::cl::init("outerproduct")}; + Option unrollVectorTransfers{ + *this, "unroll-vector-transfers", + llvm::cl::desc("Enable full unrolling of vector.transfer operations"), + llvm::cl::init(false)}; +}; +} // end anonymous namespace + +/// Apply transformations specified as patterns. +void TestLinalgCodegenStrategy::runOnFunction() { + LinalgTilingOptions tilingOptions; + if (!tileSizes.empty()) + tilingOptions = tilingOptions.setTileSizes(tileSizes); + + LinalgTilingOptions registerTilingOptions; + if (!registerTileSizes.empty()) + registerTilingOptions = + registerTilingOptions.setTileSizes(registerTileSizes); + + vector::VectorContractLowering vectorContractLowering = + llvm::StringSwitch( + vectorizeContractionTo.getValue()) + .Case("matrixintrinsics", vector::VectorContractLowering::Matmul) + .Case("dot", vector::VectorContractLowering::Dot) + .Case("outerproduct", vector::VectorContractLowering::OuterProduct) + .Default(vector::VectorContractLowering::OuterProduct); + vector::VectorTransferSplit vectorTransferSplit = + llvm::StringSwitch( + splitVectorTransfersTo.getValue()) + .Case("none", vector::VectorTransferSplit::None) + .Case("linalg-copy", vector::VectorTransferSplit::LinalgCopy) + .Case("vector-transfers", vector::VectorTransferSplit::VectorTransfer) + .Default(vector::VectorTransferSplit::None); + + CodegenStrategy strategy; + strategy.tileIf(!tileSizes.empty(), tilingOptions) + .promoteIf(promote, + LinalgPromotionOptions() + .setAlignment(16) + .setUseFullTileBuffersByDefault(promoteFullTile)) + .tileIf(!registerTileSizes.empty(), registerTilingOptions) + .promoteIf(registerPromote, LinalgPromotionOptions() + .setAlignment(16) + .setUseFullTileBuffersByDefault( + registerPromoteFullTile)) + .vectorizeIf(vectorize) + .setVectorTransformsOptions( + vector::VectorTransformsOptions() + .setVectorTransformsOptions(vectorContractLowering) + .setVectorTransferSplit(vectorTransferSplit)) + .setVectorTransferToSCFOptions( + VectorTransferToSCFOptions().setUnroll(unrollVectorTransfers)); + + strategy.transform(getFunction()); +} + +namespace mlir { +void registerTestLinalgCodegenStrategy() { + PassRegistration testLinalgCodegenStrategyPass( + "test-linalg-codegen-strategy", "Test Linalg Codegen Strategy."); +} +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -58,6 +58,7 @@ void registerTestGpuMemoryPromotionPass(); void registerTestGpuParallelLoopMappingPass(); void registerTestInterfaces(); +void registerTestLinalgCodegenStrategy(); void registerTestLinalgFusionTransforms(); void registerTestLinalgHoisting(); void registerTestLinalgTransforms(); @@ -77,7 +78,6 @@ void registerTestReducer(); void registerTestSpirvEntryPointABIPass(); void registerTestSCFUtilsPass(); -void registerTestTraitsPass(); void registerTestVectorConversions(); void registerVectorizerTestPass(); } // namespace mlir @@ -116,6 +116,7 @@ registerTestExpandTanhPass(); registerTestGpuMemoryPromotionPass(); registerTestInterfaces(); + registerTestLinalgCodegenStrategy(); registerTestLinalgFusionTransforms(); registerTestLinalgHoisting(); registerTestLinalgTransforms(); @@ -135,7 +136,6 @@ registerTestGpuParallelLoopMappingPass(); registerTestSpirvEntryPointABIPass(); registerTestSCFUtilsPass(); - registerTestTraitsPass(); registerTestVectorConversions(); registerVectorizerTestPass(); }