diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -64,7 +64,7 @@ def LinalgBufferize : Pass<"linalg-bufferize", "ModuleOp"> { let summary = "Bufferize the linalg dialect"; let constructor = "mlir::createLinalgBufferizePass()"; - let dependentDialects = ["linalg::LinalgDialect", "vector::VectorDialect"]; + let dependentDialects = ["linalg::LinalgDialect"]; } def LinalgLowerToParallelLoops diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h --- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h @@ -35,6 +35,9 @@ /// Creates an instance of func bufferization pass. std::unique_ptr createFuncBufferizePass(); +/// Creates an instance of tensor constant bufferization pass. +std::unique_ptr createTensorConstantBufferizePass(); + /// Creates an instance of the StdExpand pass that legalizes Std /// dialect ops to be convertible to LLVM. For example, /// `std.ceildivi_signed` gets transformed to a number of std operations, diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td --- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td @@ -51,4 +51,17 @@ let constructor = "mlir::createFuncBufferizePass()"; } +def TensorConstantBufferize : Pass<"tensor-constant-bufferize", "ModuleOp"> { + let summary = "Bufferize tensor constants."; + let description = [{ + This pass bufferizes tensor constants. + + This pass needs to be a module pass because it inserts std.global_memref + ops into the module, which cannot be done safely from a function pass due to + multi-threading. Most other bufferization passes can run in parallel at + function granularity. + }]; + let constructor = "mlir::createTensorConstantBufferizePass()"; +} + #endif // MLIR_DIALECT_STANDARD_TRANSFORMS_PASSES diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-elementwise.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-elementwise.mlir --- a/mlir/integration_test/Dialect/Linalg/CPU/test-elementwise.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-elementwise.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-elementwise-to-linalg -std-bufferize -linalg-bufferize -func-bufferize -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-opt %s -convert-elementwise-to-linalg -std-bufferize -tensor-constant-bufferize -linalg-bufferize -func-bufferize -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir --- a/mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-bufferize -std-bufferize -func-bufferize \ +// RUN: mlir-opt %s -linalg-bufferize -std-bufferize -tensor-constant-bufferize -func-bufferize \ // RUN: -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert.mlir @@ -0,0 +1,22 @@ +// RUN: mlir-opt %s -linalg-bufferize -std-bufferize -tensor-constant-bufferize -func-bufferize \ +// RUN: -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @main() { + %const = constant dense<10.0> : tensor<2xf32> + %insert_val = constant dense<20.0> : tensor<1xf32> + %inserted = subtensor_insert %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32> + + %unranked = tensor_cast %inserted : tensor<2xf32> to tensor<*xf32> + call @print_memref_f32(%unranked) : (tensor<*xf32>) -> () + + // CHECK: Unranked Memref base@ = {{0x[-9a-f]*}} + // CHECK-SAME: rank = 1 offset = 0 sizes = [2] strides = [1] data = + // CHECK-NEXT: [20, 10] + + return +} + +func @print_memref_f32(%ptr : tensor<*xf32>) diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-e2e.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-e2e.mlir --- a/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-e2e.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-e2e.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -std-bufferize -linalg-bufferize -func-bufferize -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-opt %s -tensor-constant-bufferize -std-bufferize -linalg-bufferize -func-bufferize -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir --- a/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir @@ -1,11 +1,11 @@ -// RUN: mlir-opt %s -linalg-bufferize -std-bufferize -func-bufferize \ +// RUN: mlir-opt %s -linalg-bufferize -std-bufferize -tensor-constant-bufferize -func-bufferize \ // RUN: -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,2,3" -linalg-bufferize \ -// RUN: -scf-bufferize -std-bufferize -func-bufferize -convert-linalg-to-loops \ +// RUN: -scf-bufferize -std-bufferize -tensor-constant-bufferize -func-bufferize -convert-linalg-to-loops \ // RUN: -convert-scf-to-std -convert-linalg-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp @@ -325,60 +325,6 @@ return success(); } }; - -/// TensorConstantOp conversion inserts a linearized 1-D vector constant that is -/// stored in memory. A linalg.reshape is introduced to convert to the desired -/// n-D buffer form. -class TensorConstantOpConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(ConstantOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const final { - - RankedTensorType rankedTensorType = - op.getType().dyn_cast(); - if (!rankedTensorType) - return failure(); - if (llvm::any_of(rankedTensorType.getShape(), [](int64_t s) { - return s == 0 || ShapedType::isDynamic(s); - })) - return failure(); - - int64_t nElements = 1; - for (int64_t s : rankedTensorType.getShape()) - nElements *= s; - Type elementType = rankedTensorType.getElementType(); - MemRefType memrefType = - getTypeConverter()->convertType(op.getType()).cast(); - VectorType flatVectorType = VectorType::get({nElements}, elementType); - MemRefType memrefOfFlatVectorType = MemRefType::get({}, flatVectorType); - MemRefType flatMemrefType = MemRefType::get({nElements}, elementType); - - Location loc = op.getLoc(); - auto attr = op.getValue().cast(); - Value alloc = - rewriter.create(loc, memrefOfFlatVectorType, ValueRange{}); - Value cstVec = rewriter.create(loc, flatVectorType, - attr.reshape(flatVectorType)); - rewriter.create(loc, cstVec, alloc); - - Value memref = - rewriter.create(loc, flatMemrefType, alloc); - if (rankedTensorType.getRank() > 1) { - // Introduce a linalg.reshape to flatten the memref. - AffineMap collapseAllDims = AffineMap::getMultiDimIdentityMap( - /*numDims=*/rankedTensorType.getRank(), op.getContext()); - memref = rewriter.create( - loc, memrefType, memref, - rewriter.getAffineMapArrayAttr(collapseAllDims)); - } - rewriter.replaceOp(op, memref); - - return success(); - } -}; } // namespace namespace { @@ -391,7 +337,7 @@ BufferizeTypeConverter typeConverter; // Mark all Standard operations legal. - target.addLegalDialect(); + target.addLegalDialect(); target.addIllegalOp(); // Mark all Linalg operations illegal as long as they work on tensors. @@ -422,8 +368,7 @@ patterns.insert< // clang-format off SubTensorOpConverter, - SubTensorInsertOpConverter, - TensorConstantOpConverter + SubTensorInsertOpConverter // clang-format on >(typeConverter, context); } diff --git a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt @@ -4,6 +4,7 @@ ExpandTanh.cpp FuncBufferize.cpp FuncConversions.cpp + TensorConstantBufferize.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/StandardOps/Transforms diff --git a/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp b/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp @@ -0,0 +1,124 @@ +//===- Bufferize.cpp - Bufferization for std ops --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements bufferization of tensor-valued std.constant ops. +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/StandardOps/Transforms/Passes.h" +#include "mlir/IR/BlockAndValueMapping.h" +#include "mlir/Transforms/Bufferize.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +namespace { +// This class creates global ops for all tensor-valued constants in the program. +// It creates them with pretty names and makes sure that duplicate globals +// aren't created. +class GlobalCreator { +public: + explicit GlobalCreator(ModuleOp module); + GlobalMemrefOp getGlobalFor(Attribute attr) { + assert(globals.find(attr) != globals.end() && "unknown constant attr"); + return globals[attr]; + } + +private: + DenseMap globals; +}; + +GlobalCreator::GlobalCreator(ModuleOp module) { + BufferizeTypeConverter typeConverter; + // Create a builder without an insertion point. We will insert using the + // symbol table to guarantee unique names. + OpBuilder globalBuilder(module.getContext()); + SymbolTable symbolTable(module); + module.walk([&](ConstantOp op) { + // We only want tensor constants for now. + auto type = op.getType().dyn_cast(); + if (!type) + return; + // If we already have a global for this constant value, no need to do + // anything else. + auto it = globals.find(op.getValue()); + if (it != globals.end()) + return; + + // Create a pretty name. + SmallString<64> buf; + llvm::raw_svector_ostream os(buf); + interleave(type.getShape(), os, "x"); + os << "x" << type.getElementType(); + + auto global = globalBuilder.create( + op.getLoc(), (Twine("__constant_") + os.str()).str(), + /*sym_visibility=*/globalBuilder.getStringAttr("private"), + /*type=*/ + TypeAttr::get(typeConverter.convertType(type)), /*initial_value=*/ + op.getValue().cast(), /*constant=*/true); + symbolTable.insert(global); + // The symbol table inserts at the end of the module, but globals are a bit + // nicer if they are at the beginning. + global.getOperation()->moveBefore(&module.front()); + globals[op.getValue()] = global; + }); +} +} // namespace + +namespace { +class BufferizeTensorConstantOp : public OpConversionPattern { +public: + BufferizeTensorConstantOp(GlobalCreator &globals, + TypeConverter &typeConverter, MLIRContext *context) + : OpConversionPattern(typeConverter, context, /*benefit=*/1), + globals(globals) {} + + LogicalResult + matchAndRewrite(ConstantOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto type = op.getType().dyn_cast(); + if (!type) + return failure(); + + auto globalMemref = globals.getGlobalFor(op.value()); + rewriter.replaceOpWithNewOp(op, globalMemref.type(), + globalMemref.getName()); + return success(); + } + GlobalCreator &globals; +}; +} // namespace + +namespace { +struct TensorConstantBufferizePass + : public TensorConstantBufferizeBase { + void runOnOperation() override { + auto module = getOperation(); + GlobalCreator globals(module); + + auto *context = &getContext(); + BufferizeTypeConverter typeConverter; + OwningRewritePatternList patterns; + ConversionTarget target(*context); + + target.addLegalDialect(); + patterns.insert(globals, typeConverter, context); + target.addDynamicallyLegalOp( + [&](ConstantOp op) { return typeConverter.isLegal(op.getType()); }); + if (failed(applyPartialConversion(module, target, std::move(patterns)))) + signalPassFailure(); + } +}; +} // namespace + +std::unique_ptr mlir::createTensorConstantBufferizePass() { + return std::make_unique(); +} diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -94,24 +94,6 @@ // ----- -// Check lowering of tensor-valued std.constant's -// TODO: Move this to std-bufferize. - -// CHECK-LABEL: func @constant() -> tensor<2x3xf32> { -// CHECK: %[[VECTOR_MEMREF:.*]] = alloc() : memref> -// CHECK: %[[VECTOR_CONST:.*]] = constant dense<[0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00]> : vector<6xf32> -// CHECK: store %[[VECTOR_CONST]], %[[VECTOR_MEMREF]][] : memref> -// CHECK: %[[MEMREF:.*]] = vector.type_cast %[[VECTOR_MEMREF]] : memref> to memref<6xf32> -// CHECK: %[[FINAL_SHAPE:.*]] = linalg.reshape %[[MEMREF]] [#map] : memref<6xf32> into memref<2x3xf32> -// CHECK: %[[RESULT:.*]] = tensor_load %[[FINAL_SHAPE]] : memref<2x3xf32> -// CHECK: return %[[RESULT]] : tensor<2x3xf32> -func @constant() -> tensor<2x3xf32> { - %0 = constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> - return %0: tensor<2x3xf32> -} - -// ----- - #accesses = [ affine_map<(i, j, k) -> (j, i, k)>, affine_map<(i, j, k) -> (i, j)> diff --git a/mlir/test/Dialect/Standard/tensor-constant-bufferize.mlir b/mlir/test/Dialect/Standard/tensor-constant-bufferize.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/Standard/tensor-constant-bufferize.mlir @@ -0,0 +1,59 @@ +// RUN: mlir-opt %s -tensor-constant-bufferize -split-input-file + +// CHECK-LABEL: module { +// We check the debug name too since we put some effort into making that readable. +// The name isn't load-bearing though. +// CHECK: global_memref "private" constant @__constant_3x4xf32 : memref<3x4xf32> = dense<7.000000e+00> +// CHECK: @basic +func @basic() -> tensor<3x4xf32> { + // CHECK: %[[MEMREF:.*]] = get_global_memref @__constant_3x4xf32 : memref<3x4xf32> + // CHECK: %[[TENSOR:.*]] = tensor_load %[[MEMREF]] + %0 = constant dense<7.0> : tensor<3x4xf32> + // CHECK: return %[[TENSOR]] + return %0 : tensor<3x4xf32> +} + +// CHECK: } + +// ----- + +// CHECK-LABEL: module { + +// Only one global is created. +// CHECK: global_memref +// CHECK-NOT: global_memref +func @duplicate_constants() -> (tensor<3x4xf32>, tensor<3x4xf32>) { + %0 = constant dense<7.0> : tensor<3x4xf32> + %1 = constant dense<7.0> : tensor<3x4xf32> + return %0, %1 : tensor<3x4xf32>, tensor<3x4xf32> +} + +// CHECK: } + +// ----- + +// CHECK-LABEL: module { + +// Two globals are created. +// CHECK: global_memref +// CHECK: global_memref +// CHECK-NOT: global_memref +func @multiple_constants() -> (tensor<3x4xf32>, tensor<3x4xf32>) { + %0 = constant dense<7.0> : tensor<3x4xf32> + %1 = constant dense<8.0> : tensor<3x4xf32> + return %0, %1 : tensor<3x4xf32>, tensor<3x4xf32> +} + +// CHECK: } + +// ----- + +// CHECK-LABEL: module { +// We don't convert non-tensor globals. +// CHECK-NOT: global_memref +func @non_tensor() { + %0 = constant 7 : i32 + return +} + +// CHECK: }