diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/ComposeSubView.h b/mlir/include/mlir/Dialect/StandardOps/Transforms/ComposeSubView.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/ComposeSubView.h @@ -0,0 +1,28 @@ +//===- ComposeSubView.h - Combining composed subview ops --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Patterns for combining composed subview ops. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_STANDARDOPS_TRANSFORMS_COMPOSESUBVIEW_H_ +#define MLIR_DIALECT_STANDARDOPS_TRANSFORMS_COMPOSESUBVIEW_H_ + +namespace mlir { + +// Forward declarations. +class MLIRContext; +class RewritePatternSet; +using OwningRewritePatternList = RewritePatternSet; + +void populateComposeSubViewPatterns(OwningRewritePatternList &patterns, + MLIRContext *context); + +} // namespace mlir + +#endif // MLIR_DIALECT_STANDARDOPS_TRANSFORMS_COMPOSESUBVIEW_H_ diff --git a/mlir/lib/Dialect/StandardOps/Transforms/ComposeSubView.cpp b/mlir/lib/Dialect/StandardOps/Transforms/ComposeSubView.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/StandardOps/Transforms/ComposeSubView.cpp @@ -0,0 +1,138 @@ +//===- ComposeSubView.cpp - Combining composed subview ops ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains patterns for combining composed subview ops (i.e. subview +// of a subview becomes a single subview). +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/StandardOps/Transforms/ComposeSubView.h" + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +namespace mlir { + +namespace { + +// Replaces a subview of a subview with a single subview. Only supports subview +// ops with static sizes and static strides (both static and dynamic offsets +// are supported). +struct ComposeSubViewOpPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(memref::SubViewOp op, + PatternRewriter &rewriter) const override { + // 'op' is the 'SubViewOp' we're rewriting. 'source_op' is the op that + // produces the input of the op we're rewriting (for 'SubViewOp' the input + // is called the "source" value). We can only combine them if both 'op' and + // 'source_op' are 'SubViewOp'. + auto source_op = op.source().getDefiningOp(); + if (!source_op) + return failure(); + + // A 'SubViewOp' can be "rank-reducing" by eliminating dimensions of the + // output memref that are statically known to be equal to 1. We do not + // allow 'source_op' to be a rank-reducing subview because then our two + // 'SubViewOp's would have different numbers of offset/size/stride + // parameters (just difficult to deal with, not impossible if we end up + // needing it). + if (source_op.getSourceType().getRank() != source_op.getType().getRank()) { + return failure(); + } + + // Offsets, sizes and strides ValueOrAttr for the combined 'SubViewOp'. + SmallVector offsets, sizes, strides; + auto op_offset_iter = op.offsets().begin(); + auto source_offset_iter = source_op.offsets().begin(); + + // An affine map that sums two dimension parameters. + auto *ctx = rewriter.getContext(); + auto binary_add_map = AffineMap::get( + /*dimCount=*/2, /*symbolCount=*/0, + getAffineDimExpr(0, ctx) + getAffineDimExpr(1, ctx)); + + // The rules for calculating the new offsets, sizes and strides are: + // * Multiple subview offsets for a given dimension compose additively. + // ("Offset by m" followed by "Offset by n" == "Offset by m + n") + // * Multiple strides for a given dimension compose multiplicatively. + // ("Stride by m" followed by "Stride by n" == "Stride by m * n") + // * Multiple sizes for a given dimension compose by taking the size of the + // final subview and ignoring the rest. ("Take m values" followed by "Take + // n values" == "Take n values") This size must also be the smallest one + // by definition (a subview needs to be the same size as or smaller than + // its source along each dimension; presumably subviews that are larger + // than their sources are disallowed by validation). + for (auto [op_offset, source_offset, op_size, op_stride, source_stride] : + llvm::zip(op.getMixedOffsets(), source_op.getMixedOffsets(), + op.getMixedSizes(), op.getMixedStrides(), + source_op.getMixedStrides())) { + // We only support static sizes and static strides. + if (op_size.is() || op_stride.is() || + source_stride.is()) { + return failure(); + } + + sizes.push_back(op_size); + strides.push_back(rewriter.getI64IntegerAttr( + op_stride.get().cast().getInt() * + source_stride.get().cast().getInt())); + + Value opOffset = op_offset.dyn_cast(), + sourceOffset = source_offset.dyn_cast(); + // If both offsets are static we can simply calculate the combined + // offset statically. + if (!opOffset && !sourceOffset) { + offsets.push_back(rewriter.getI64IntegerAttr( + op_offset.get().cast().getInt() + + source_offset.get().cast().getInt())); + continue; + } + + // If either offset attribute is dynamic, we must emit an additional + // affine transformation to add the two offsets together dynamically. Any + // static offsets must also be materialized as runtime constants. + Attribute opOffsetAttr = op_offset.dyn_cast(), + sourceOffsetAttr = source_offset.dyn_cast(); + Value op_offset_for_sum = + opOffsetAttr + ? rewriter.create( + op.getLoc(), opOffsetAttr.cast().getInt()) + : *op_offset_iter++; + Value source_offset_for_sum = + sourceOffsetAttr + ? rewriter.create( + op.getLoc(), sourceOffsetAttr.cast().getInt()) + : *source_offset_iter++; + Value result = rewriter.create( + op.getLoc(), binary_add_map, + ValueRange{op_offset_for_sum, source_offset_for_sum}); + offsets.push_back(result); + } + + // This replaces 'op' but leaves 'source_op' alone; if it no longer has any + // uses it can be removed by a (separate) dead code elimination pass. + rewriter.replaceOpWithNewOp(op, source_op.source(), + offsets, sizes, strides); + return success(); + } +}; + +} // namespace + +void populateComposeSubViewPatterns(OwningRewritePatternList &patterns, + MLIRContext *context) { + patterns.insert(context); +} + +} // namespace mlir diff --git a/mlir/test/Transforms/compose-subview.mlir b/mlir/test/Transforms/compose-subview.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Transforms/compose-subview.mlir @@ -0,0 +1,62 @@ +// RUN: mlir-opt -allow-unregistered-dialect %s -test-compose-subview -split-input-file | FileCheck %s + +// CHECK: [[MAP:#.*]] = affine_map<(d0, d1) -> (d0 * 1024 + d1 + 3456) +#map0 = affine_map<(d0, d1) -> (d0 * 1024 + d1 + 2304)> +#map1 = affine_map<(d0, d1) -> (d0 * 1024 + d1 + 3456)> + +func @main(%input: memref<4x1024xf32>) -> memref<1x128xf32, #map1> { + // CHECK: subview %arg0[3, 384] [1, 128] [1, 1] + // CHECK-SAME: memref<4x1024xf32> to memref<1x128xf32, [[MAP]]> + %0 = memref.subview %input[2, 256] [2, 256] [1, 1] : memref<4x1024xf32> to memref<2x256xf32, #map0> + %1 = memref.subview %0[1, 128] [1, 128] [1, 1] : memref<2x256xf32, #map0> to memref<1x128xf32, #map1> + return %1 : memref<1x128xf32, #map1> +} + +// ----- + +// CHECK: [[MAP:#.*]] = affine_map<(d0, d1) -> (d0 * 1024 + d1 + 3745) +#map0 = affine_map<(d0, d1) -> (d0 * 1024 + d1 + 1536)> +#map1 = affine_map<(d0, d1) -> (d0 * 1024 + d1 + 2688)> +#map2 = affine_map<(d0, d1) -> (d0 * 1024 + d1 + 3745)> + +func @main(%input: memref<4x1024xf32>) -> memref<1x10xf32, #map2> { + // CHECK: subview %arg0[3, 673] [1, 10] [1, 1] + // CHECK-SAME: memref<4x1024xf32> to memref<1x10xf32, [[MAP]]> + %0 = memref.subview %input[1, 512] [3, 256] [1, 1] : memref<4x1024xf32> to memref<3x256xf32, #map0> + %1 = memref.subview %0[1, 128] [2, 128] [1, 1] : memref<3x256xf32, #map0> to memref<2x128xf32, #map1> + %2 = memref.subview %1[1, 33] [1, 10] [1, 1] : memref<2x128xf32, #map1> to memref<1x10xf32, #map2> + return %2 : memref<1x10xf32, #map2> +} + +// ----- + +// CHECK: [[MAP:#.*]] = affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1) +#map = affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)> + +func @main(%input: memref<4x1024xf32>) -> memref<1x128xf32, #map> { + // CHECK: [[CST_3:%.*]] = constant 3 : index + %cst_1 = constant 1 : index + %cst_2 = constant 2 : index + // CHECK: subview %arg0{{\[}}[[CST_3]], 384] [1, 128] [1, 1] + // CHECK-SAME: memref<4x1024xf32> to memref<1x128xf32, [[MAP]]> + %0 = memref.subview %input[%cst_2, 256] [2, 256] [1, 1] : memref<4x1024xf32> to memref<2x256xf32, #map> + %1 = memref.subview %0[%cst_1, 128] [1, 128] [1, 1] : memref<2x256xf32, #map> to memref<1x128xf32, #map> + return %1 : memref<1x128xf32, #map> +} + +// ----- + +// CHECK: [[MAP:#.*]] = affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1) +#map = affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)> + +func @main(%input: memref<4x1024xf32>) -> memref<1x128xf32, #map> { + // CHECK: [[CST_3:%.*]] = constant 3 : index + %cst_2 = constant 2 : index + // CHECK: [[CST_384:%.*]] = constant 384 : index + %cst_128 = constant 128 : index + // CHECK: subview %arg0{{\[}}[[CST_3]], [[CST_384]]] [1, 128] [1, 1] + // CHECK-SAME: memref<4x1024xf32> to memref<1x128xf32, [[MAP]]> + %0 = memref.subview %input[%cst_2, 256] [2, 256] [1, 1] : memref<4x1024xf32> to memref<2x256xf32, #map> + %1 = memref.subview %0[1, %cst_128] [1, 128] [1, 1] : memref<2x256xf32, #map> to memref<1x128xf32, #map> + return %1 : memref<1x128xf32, #map> +} diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -12,6 +12,7 @@ TestDataLayoutQuery.cpp TestDominance.cpp TestDynamicPipeline.cpp + TestComposeSubview.cpp TestLoopFusion.cpp TestGpuMemoryPromotion.cpp TestGpuParallelLoopMapping.cpp diff --git a/mlir/test/lib/Transforms/TestComposeSubView.cpp b/mlir/test/lib/Transforms/TestComposeSubView.cpp new file mode 100644 --- /dev/null +++ b/mlir/test/lib/Transforms/TestComposeSubView.cpp @@ -0,0 +1,46 @@ +//===- TestComposeSubView.cpp - Test composed subviews --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass to test the composed subview patterns. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/StandardOps/Transforms/ComposeSubView.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +using namespace mlir; + +namespace { +struct TestComposeSubViewPass + : public PassWrapper { + void runOnFunction() override; + void getDependentDialects(DialectRegistry ®istry) const override; +}; + +void TestComposeSubViewPass::getDependentDialects( + DialectRegistry ®istry) const { + registry.insert(); +} + +void TestComposeSubViewPass::runOnFunction() { + OwningRewritePatternList patterns(&getContext()); + populateComposeSubViewPatterns(patterns, &getContext()); + (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); +} +} // namespace + +namespace mlir { +namespace test { +void registerTestComposeSubView() { + PassRegistration pass( + "test-compose-subview", "Test combining composed subviews"); +} +} // namespace test +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -72,6 +72,7 @@ void registerTestDominancePass(); void registerTestDynamicPipelinePass(); void registerTestExpandTanhPass(); +void registerTestComposeSubView(); void registerTestGpuParallelLoopMappingPass(); void registerTestIRVisitorsPass(); void registerTestInterfaces(); @@ -148,6 +149,7 @@ test::registerTestDominancePass(); test::registerTestDynamicPipelinePass(); test::registerTestExpandTanhPass(); + test::registerTestComposeSubView(); test::registerTestGpuParallelLoopMappingPass(); test::registerTestIRVisitorsPass(); test::registerTestInterfaces();