diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -20,6 +20,7 @@ #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" #include "mlir/Dialect/X86Vector/Transforms.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/Parser/Parser.h" #include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallSet.h" @@ -553,69 +554,32 @@ SmallVectorImpl &ivs, const LoopIndexToRangeIndexMap &loopIndexToRangeIndex); -/// Callback returning the padding value to use for a given OpOperand or failure -/// for no padding. This should be a function of both the operation and the -/// operand type. -using PaddingValueComputationFunction = - std::function(OpBuilder &, OpOperand &)>; - -/// Callback returning true if the PadOp defining the given OpOperand shall be -/// marked as nofold to enable packing. -using PaddingNoFoldComputationFunction = std::function; - -/// Callback returning the number of loops to hoist the PadOp defining the given -/// OpOperand. -using PaddingHoistComputationFunction = std::function; - -/// Callback returning the transpose vector used to permute the result tensor -/// dimensions of the PadOp defining the given OpOperand. -using PaddingTransposeComputationFunction = - std::function(OpOperand &)>; - struct LinalgPaddingOptions { - /// Callback returning the padding value to use for a given OpOperand or - /// failure for no padding. Padding operations are introduced if - /// `paddingValueComputationFunction` is set and does not return failure. - /// Padding all operands guarantees the operation is statically shaped and - /// thus can be vectorized. - PaddingValueComputationFunction paddingValueComputationFunction = nullptr; - - LinalgPaddingOptions & - setPaddingValueComputationFunction(PaddingValueComputationFunction fun) { - paddingValueComputationFunction = std::move(fun); + /// A padding value for every operand parsed using the attribute parser. + SmallVector paddingValues; + LinalgPaddingOptions &setPaddingValues(ArrayRef pv) { + paddingValues.assign(pv.begin(), pv.end()); return *this; } - - /// Callback returning true if the PadOp defining the given OpOperand shall be - /// marked as nofold to enable packing. A padding operation is only marked - /// nofold if `paddingNoFoldComputationFunction` is set and returns true. - /// Otherwise, the nofold attribute is set to false. - PaddingNoFoldComputationFunction paddingNoFoldComputationFunction = nullptr; - - LinalgPaddingOptions & - setPaddingNoFoldComputationFunction(PaddingNoFoldComputationFunction fun) { - paddingNoFoldComputationFunction = std::move(fun); + /// A flag for every operand to mark the PadOp as nofold to enable packing for + /// statically shaped operands. + SmallVector packPaddings; + LinalgPaddingOptions &setPackPaddings(ArrayRef pp) { + packPaddings.assign(pp.begin(), pp.end()); return *this; } - - /// Callback returning the number of loops to hoist the PadOp defining the - /// given OpOperand. - PaddingHoistComputationFunction paddingHoistComputationFunction = nullptr; - - LinalgPaddingOptions & - setPaddingHoistComputationFunction(PaddingHoistComputationFunction fun) { - paddingHoistComputationFunction = std::move(fun); + /// A number of loops to hoist the PadOp out for every operand. + SmallVector hoistPaddings; + LinalgPaddingOptions &setHoistPaddings(ArrayRef hp) { + hoistPaddings.assign(hp.begin(), hp.end()); return *this; } - - /// Callback returning the transpose vector used to permute the result tensor - /// dimensions of the PadOp defining the given OpOperand. - PaddingTransposeComputationFunction paddingTransposeComputationFunction = - nullptr; - - LinalgPaddingOptions &setPaddingTransposeComputationFunction( - PaddingTransposeComputationFunction fun) { - paddingTransposeComputationFunction = std::move(fun); + /// A permutation vector for every operand used to transpose the packed PadOp + /// results. + SmallVector> transposePaddings; + LinalgPaddingOptions & + setTransposePaddings(ArrayRef> tp) { + transposePaddings.assign(tp.begin(), tp.end()); return *this; } }; @@ -1254,16 +1218,15 @@ PatternRewriter &rewriter) const override; }; -/// Pad the operands of `opToPad` to a static bounding box. Use `paddingFunc` -/// and `nofoldFunc` to set the padding value and the nofold attribute of the +/// Pad the operands of `opToPad` to a static bounding box. Use `paddingValues` +/// and `packPaddings` to set the padding value and the nofold attribute of the /// introduced tensor::PadOps, respectively. Update `paddedOp` to the cloned /// statically shaped operation and return the extracted dynamically shaped /// results. If padding fails, return failure. FailureOr> rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, - const PaddingValueComputationFunction &paddingFunc, - const PaddingNoFoldComputationFunction &nofoldFunc, - LinalgOp &paddedOp); + ArrayRef paddingValues, + ArrayRef packPaddings, LinalgOp &paddedOp); using OptimizeCopyFn = std::function; diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -51,6 +51,7 @@ MLIRSCF MLIRSCFTransforms MLIRSCFUtils + MLIRParser MLIRPass MLIRSparseTensor MLIRTensor diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -158,16 +158,17 @@ return *this; } -/// Helper function that tries to pad `opOperand`. Exit early for scalar -/// operands, if `paddingFunc` returns failure, or if `opOperand` is not defined -/// by an ExtractSliceOp. Otherwise, try to pad the operand even if it already -/// has a static shape. Set `result` to the result of the created tensor::PadOp -/// or and return success if the operand either has been padded to a static -/// shape or already had a static shape and failure otherwise. +/// Pad `opOperand` using the provided `paddingValues`. Exit early for scalar +/// operands, if `paddingValues` contains no value for the `opOperand`, or if +/// `opOperand` is not defined by an ExtractSliceOp. Otherwise, try to pad the +/// operand even if it already has a static shape. Set `result` to the result of +/// the created tensor::PadOp or and return success if the operand either has +/// been padded to a static shape or already had a static shape and failure +/// otherwise. static LogicalResult padOperandToSmallestStaticBoundingBox( OpBuilder &b, linalg::LinalgOp opToPad, OpOperand *opOperand, - const PaddingValueComputationFunction &paddingFunc, - const PaddingNoFoldComputationFunction &nofoldFunc, Value &result) { + ArrayRef paddingValues, ArrayRef packPaddings, + Value &result) { // Get the shape of the operand and check if it has a dynamic shape. Only // return failure if the operand is not a scalar and has a dynamic shape. ArrayRef shape = opToPad.getShape(opOperand); @@ -178,9 +179,13 @@ return success(); // Cannot pad if the padding value is unknown. - FailureOr paddingValue = paddingFunc(b, *opOperand); - if (failed(paddingValue)) + if (opOperand->getOperandNumber() >= paddingValues.size()) return failure(hasDynamicShape); + Attribute paddingValueAttr = + parseAttribute(paddingValues[opOperand->getOperandNumber()], + getElementTypeOrSelf(opOperand->get().getType())); + Value paddingValue = b.create( + opToPad.getLoc(), paddingValueAttr.getType(), paddingValueAttr); // Cannot construct a static bounding box if the operand is not defined by an // ExtractSliceOp. @@ -220,18 +225,18 @@ // Pad the operand to the bounding box defined by `staticSizes`. auto staticTensorType = RankedTensorType::get( staticSizes, getElementTypeOrSelf(opOperand->get())); - bool nofold = nofoldFunc ? nofoldFunc(*opOperand) : false; - result = - makeComposedPadHighOp(b, opToPad->getLoc(), staticTensorType, - opOperand->get(), paddingValue.getValue(), nofold); + bool nofold = opOperand->getOperandNumber() < packPaddings.size() + ? packPaddings[opOperand->getOperandNumber()] + : false; + result = makeComposedPadHighOp(b, opToPad->getLoc(), staticTensorType, + opOperand->get(), paddingValue, nofold); return success(); } FailureOr> linalg::rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, - const PaddingValueComputationFunction &paddingFunc, - const PaddingNoFoldComputationFunction &nofoldFunc, - LinalgOp &paddedOp) { + ArrayRef paddingValues, + ArrayRef packPaddings, LinalgOp &paddedOp) { Location loc = opToPad->getLoc(); // TODO: there are cases where we may still want to pad to larger sizes. @@ -249,7 +254,7 @@ // If padding was requested but the shape cannot be bounded statically then // the pattern fails to apply. if (failed(padOperandToSmallestStaticBoundingBox( - b, opToPad, opOperand, paddingFunc, nofoldFunc, paddedOperand))) + b, opToPad, opOperand, paddingValues, packPaddings, paddedOperand))) return failure(); newOperands.push_back(paddedOperand ? paddedOperand : opOperand->get()); } @@ -491,21 +496,16 @@ // Pad the operation. LinalgOp paddedOp; - FailureOr> newResults = rewriteAsPaddedOp( - rewriter, linalgOp, options.paddingValueComputationFunction, - options.paddingNoFoldComputationFunction, paddedOp); + FailureOr> newResults = + rewriteAsPaddedOp(rewriter, linalgOp, options.paddingValues, + options.packPaddings, paddedOp); if (failed(newResults)) return failure(); - // Compute the desired hoisting depths. - SmallVector depths; - if (options.paddingHoistComputationFunction) { - for (OpOperand *opOperand : linalgOp.getInputAndOutputOperands()) - depths.push_back(options.paddingHoistComputationFunction(*opOperand)); - } - // Hoist the padding. - for (const auto &en : enumerate(depths)) { + for (const auto &en : enumerate(options.hoistPaddings)) { + if (static_cast(en.index()) >= paddedOp.getNumInputsAndOutputs()) + break; OpOperand &opOperand = paddedOp->getOpOperand(en.index()); auto padOp = opOperand.get().getDefiningOp(); if (!padOp || en.value() == 0) @@ -513,7 +513,9 @@ tensor::PadOp hoistedOp; SmallVector transposeOps; SmallVector transposeVector = - options.paddingTransposeComputationFunction(opOperand); + en.index() < options.transposePaddings.size() + ? options.transposePaddings[en.index()] + : SmallVector{}; FailureOr newResult = hoistPaddingOnTensors( padOp, en.value(), transposeVector, hoistedOp, transposeOps); diff --git a/mlir/test/Dialect/Linalg/codegen-strategy.mlir b/mlir/test/Dialect/Linalg/codegen-strategy.mlir --- a/mlir/test/Dialect/Linalg/codegen-strategy.mlir +++ b/mlir/test/Dialect/Linalg/codegen-strategy.mlir @@ -1,13 +1,13 @@ // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=2,4,8 vectorize vectorize-contraction-to=matrixintrinsics unroll-vector-transfers=true" -split-input-file | FileCheck %s --check-prefix=CHECK-INTRINSIC // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 promote promote-full-tile-pad register-tile-sizes=2,4,8 vectorize vectorize-contraction-to=outerproduct split-transfers=true unroll-vector-transfers=false" -split-input-file | FileCheck %s --check-prefix=CHECK-OUTER // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 tile-interchange=1,2,0 generalize iterator-interchange=0,2,1" -split-input-file | FileCheck %s --check-prefix=CHECK-INTERCHANGE -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 pad pack-paddings=1,1,0 hoist-paddings=3,3,0" -split-input-file | FileCheck %s --check-prefix=CHECK-PAD -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 fuse pad vectorize" -split-input-file | FileCheck %s --check-prefix=CHECK-FUSE -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=conv anchor-op=linalg.conv_2d_nhwc_hwcf tile-sizes=1,1,8,32,1,1,8 fuse pad decompose vectorize vectorize-padding" -split-input-file | FileCheck %s --check-prefix=CHECK-DECOMP +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 pad padding-values=0.0,0.0,0.0 pack-paddings=1,1,0 hoist-paddings=3,3,0" -split-input-file | FileCheck %s --check-prefix=CHECK-PAD +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul tile-sizes=16,32,64 fuse pad padding-values=0.0,0.0,0.0 vectorize" -split-input-file | FileCheck %s --check-prefix=CHECK-FUSE +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-func=conv anchor-op=linalg.conv_2d_nhwc_hwcf tile-sizes=1,1,8,32,1,1,8 fuse pad padding-values=0.0,0.0,0.0 decompose vectorize vectorize-padding" -split-input-file | FileCheck %s --check-prefix=CHECK-DECOMP // CHECK-INTRINSIC: func @matmul( // CHECK-OUTER: func @matmul( -func @matmul(%arg0: memref<72x72xf32>, %arg1: memref<72x72xf32>, %arg2: memref<72x72xf32>) { +func.func @matmul(%arg0: memref<72x72xf32>, %arg1: memref<72x72xf32>, %arg2: memref<72x72xf32>) { // Check the matrix intrinsic lowering is triggered. // CHECK-INTRINSIC: vector.matrix_multiply @@ -17,13 +17,13 @@ // Check the outer product lowering is triggered. // CHECK-OUTER: vector.outerproduct {{.*}} : vector<2xf32>, vector<4xf32> linalg.matmul ins(%arg0, %arg1: memref<72x72xf32>, memref<72x72xf32>) outs(%arg2: memref<72x72xf32>) - return + func.return } // ----- // CHECK-INTERCHANGE: func @matmul( -func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> { +func.func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> { // CHECK-INTERCHANGE-DAG: %[[C16:.*]] = arith.constant 16 // CHECK-INTERCHANGE-DAG: %[[C32:.*]] = arith.constant 32 // CHECK-INTERCHANGE-DAG: %[[C64:.*]] = arith.constant 64 @@ -37,7 +37,7 @@ // CHECK-INTERCHANGE: linalg.generic // CHECK-INTERCHANGE-SAME: iterator_types = ["parallel", "reduction", "parallel"] %0 = linalg.matmul ins(%arg0, %arg1: tensor<72x72xf32>, tensor<72x72xf32>) outs(%arg2: tensor<72x72xf32>) -> tensor<72x72xf32> - return %0 : tensor<72x72xf32> + func.return %0 : tensor<72x72xf32> } // ----- @@ -45,7 +45,7 @@ // CHECK-PAD-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0) -> (16, -d0 + 72)> // CHECK-PAD: func @matmul( -func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> { +func.func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> { // Check the padding of the input operands has been hoisted out of the tile loop nest. // CHECK-PAD-COUNT=2: tensor.pad %{{.*}} nofold @@ -56,13 +56,13 @@ // CHECK-PAD-COUNT=2: scf.for // CHECK-PAD: linalg.matmul %0 = linalg.matmul ins(%arg0, %arg1: tensor<72x72xf32>, tensor<72x72xf32>) outs(%arg2: tensor<72x72xf32>) -> tensor<72x72xf32> - return %0 : tensor<72x72xf32> + func.return %0 : tensor<72x72xf32> } // ----- // CHECK-FUSE: func @matmul( -func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> { +func.func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> { // Check the padding and vectorization applies to the fill operation due to the empty anchor op string. // CHECK-FUSE: %[[CST:.*]] = arith.constant dense<0.000000e+00> @@ -73,13 +73,13 @@ // Check the matmul is padded and vectorized despite the empty anchor op string. // CHECK-FUSE: vector.outerproduct %1 = linalg.matmul ins(%arg0, %arg1: tensor<72x72xf32>, tensor<72x72xf32>) outs(%0: tensor<72x72xf32>) -> tensor<72x72xf32> - return %1 : tensor<72x72xf32> + func.return %1 : tensor<72x72xf32> } // ----- // CHECK-DECOMP: func @conv( -func @conv(%arg0: tensor<8x18x17x32xf32>, %arg1: tensor<3x3x32x64xf32>, %arg2: tensor<8x16x15x64xf32>) -> tensor<8x16x15x64xf32> { +func.func @conv(%arg0: tensor<8x18x17x32xf32>, %arg1: tensor<3x3x32x64xf32>, %arg2: tensor<8x16x15x64xf32>) -> tensor<8x16x15x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<8x16x15x64xf32>) -> tensor<8x16x15x64xf32> @@ -88,5 +88,5 @@ // CHECK-DECOMP: vector.outerproduct // CHECK-DECOMP: vector.transfer_write {{.*}}: vector<1x8x32xf32>, tensor<1x1x?x32xf32> %1 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<8x18x17x32xf32>, tensor<3x3x32x64xf32>) outs(%0 : tensor<8x16x15x64xf32>) -> tensor<8x16x15x64xf32> - return %1 : tensor<8x16x15x64xf32> + func.return %1 : tensor<8x16x15x64xf32> } diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir --- a/mlir/test/Dialect/Linalg/pad.mlir +++ b/mlir/test/Dialect/Linalg/pad.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=MATMUL -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=FILL -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 pad-inputs-only run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.0,0.0,0.0 pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=MATMUL +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad padding-values=0.0,0.0 pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=FILL +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.0,0.0 pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY // MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (7, -s0 + 12)> // MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)> @@ -13,10 +13,10 @@ // MATMUL-SAME: %[[IV0:[0-9a-zA-Z]*]]: index // MATMUL-SAME: %[[IV1:[0-9a-zA-Z]*]]: index // MATMUL-SAME: %[[IV2:[0-9a-zA-Z]*]]: index -func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { +func.func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { // MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index // MATMUL: %[[TS2:.*]] = affine.min #[[MAP0]]()[%[[IV2]]] @@ -43,7 +43,7 @@ // MATMUL: %[[T6:.*]] = tensor.insert_slice %[[T5]] %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - return %5 : tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> } // ----- @@ -57,10 +57,10 @@ // MATMUL-SAME: %[[IV0:[0-9a-zA-Z]*]]: index // MATMUL-SAME: %[[IV1:[0-9a-zA-Z]*]]: index // MATMUL-SAME: %[[IV2:[0-9a-zA-Z]*]]: index -func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { +func.func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { // MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index %3 = tensor.extract_slice %arg0[%iv0, %iv2] [4, 6] [1, 1] : tensor<24x12xf32> to tensor<4x6xf32> @@ -85,8 +85,8 @@ %7 = linalg.matmul ins(%3, %5 : tensor<4x6xf32>, tensor<6x?xf32>) outs(%6 : tensor<4x?xf32>) -> tensor<4x?xf32> %8 = tensor.insert_slice %7 into %arg2[%iv0, %iv1] [4, %4] [1, 1] : tensor<4x?xf32> into tensor<24x25xf32> - // MATMUL: return %[[T4]] - return %8 : tensor<24x25xf32> + // MATMUL: return %[[T4]] + func.return %8 : tensor<24x25xf32> } // ----- @@ -108,10 +108,10 @@ // MATMUL-SAME: %[[IV0:[0-9a-zA-Z]*]]: index // MATMUL-SAME: %[[IV1:[0-9a-zA-Z]*]]: index // MATMUL-SAME: %[[IV2:[0-9a-zA-Z]*]]: index -func @dynamic_sizes(%arg0: tensor, - %arg1: tensor, - %arg2: tensor, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor { +func.func @dynamic_sizes(%arg0: tensor, + %arg1: tensor, + %arg2: tensor, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor { // MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index // MATMUL-DAG: %[[C1:.*]] = arith.constant 1 %c1 = arith.constant 1 : index @@ -156,7 +156,7 @@ %13 = tensor.insert_slice %12 into %arg2[%iv0, %iv1] [%6, %9] [1, 1] : tensor into tensor // MATMUL: return %[[T8]] - return %13 : tensor + func.return %13 : tensor } // ----- @@ -165,8 +165,8 @@ // FILL: pad_multiple // FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32> -func @pad_multiple(%arg0: tensor<64x64xf32>, - %iv0 : index) -> tensor { +func.func @pad_multiple(%arg0: tensor<64x64xf32>, + %iv0 : index) -> tensor { %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor @@ -178,7 +178,7 @@ // FILL: = tensor.extract_slice %[[T2]] %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor - return %2 : tensor + func.return %2 : tensor } // ----- @@ -187,8 +187,8 @@ // MATMUL: compose_padding // MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32> -func @compose_padding(%arg0: tensor<64x64xf32>, - %iv0 : index) -> tensor { +func.func @compose_padding(%arg0: tensor<64x64xf32>, + %iv0 : index) -> tensor { %cst = arith.constant 0.0 : f32 // MATMUL: %[[SIZE:.*]] = affine.min @@ -220,7 +220,7 @@ %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor // MATMUL: return %[[T5]] - return %5 : tensor + func.return %5 : tensor } // ----- @@ -228,8 +228,8 @@ #map0 = affine_map<()[s0] -> (64, s0)> // MATMUL: different_padding_values -func @different_padding_values(%arg0: tensor<64x64xf32>, - %iv0 : index) -> tensor { +func.func @different_padding_values(%arg0: tensor<64x64xf32>, + %iv0 : index) -> tensor { %cst = arith.constant 42.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor @@ -245,7 +245,7 @@ // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor - return %5 : tensor + func.return %5 : tensor } // ----- @@ -253,8 +253,8 @@ #map0 = affine_map<()[s0] -> (64, s0)> // MATMUL: different_padding_dynamic_sizes -func @different_padding_dynamic_sizes(%arg0: tensor<64x64xf32>, - %iv0 : index) -> tensor { +func.func @different_padding_dynamic_sizes(%arg0: tensor<64x64xf32>, + %iv0 : index) -> tensor { %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%iv0, %iv0] [1, 1] : tensor<64x64xf32> to tensor @@ -270,7 +270,7 @@ // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor - return %5 : tensor + func.return %5 : tensor } // ----- @@ -278,8 +278,8 @@ #map0 = affine_map<()[s0] -> (64, s0)> // MATMUL: different_padding_dynamic_rank -func @different_padding_dynamic_rank(%arg0: tensor<64x64x1xf32>, - %iv0 : index) -> tensor { +func.func @different_padding_dynamic_rank(%arg0: tensor<64x64x1xf32>, + %iv0 : index) -> tensor { %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0, 0] [%size, %size, 1] [1, 1, 1] : tensor<64x64x1xf32> to tensor @@ -295,7 +295,7 @@ // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %4 = linalg.matmul ins(%3, %3 : tensor, tensor) outs(%3 : tensor) -> tensor - return %4 : tensor + func.return %4 : tensor } // ----- @@ -303,8 +303,8 @@ #map0 = affine_map<()[s0] -> (64, s0)> // MATMUL: different_padding_static_sizes -func @different_padding_static_sizes(%arg0: tensor<62x62xf32>, - %iv0 : index) -> tensor { +func.func @different_padding_static_sizes(%arg0: tensor<62x62xf32>, + %iv0 : index) -> tensor { %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor @@ -320,7 +320,7 @@ // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor - return %5 : tensor + func.return %5 : tensor } // ----- @@ -330,9 +330,9 @@ // FILL: scalar_operand // FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: f32 // FILL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<24x12xf32> -func @scalar_operand(%arg0: f32, - %arg1: tensor<24x12xf32>, - %iv0 : index) -> tensor<24x12xf32> { +func.func @scalar_operand(%arg0: f32, + %arg1: tensor<24x12xf32>, + %iv0 : index) -> tensor<24x12xf32> { %0 = affine.min #map0()[%iv0] // FILL: %[[T0:.*]] = tensor.extract_slice %[[ARG1]] @@ -343,7 +343,7 @@ // FILL: %[[T6:.*]] = linalg.fill ins(%[[ARG0]]{{.*}}outs(%[[T1]] %2 = linalg.fill ins(%arg0 : f32) outs(%1 : tensor<4x?xf32>) -> tensor<4x?xf32> %3 = tensor.insert_slice %2 into %arg1[0, 0] [4, %0] [1, 1] : tensor<4x?xf32> into tensor<24x12xf32> - return %3 : tensor<24x12xf32> + func.return %3 : tensor<24x12xf32> } // ----- @@ -352,10 +352,10 @@ // MATMUL: static_extract_slice_missing // MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<4x5xf32>, -func @static_extract_slice_missing(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<4x5xf32>, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<4x5xf32> { +func.func @static_extract_slice_missing(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<4x5xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<4x5xf32> { %0 = affine.min #map0()[%iv2] %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor @@ -366,7 +366,7 @@ // MATMUL: = linalg.matmul ins(%[[T0]], %[[T1]] // MATMUL-SAME: outs(%[[ARG2]] %3 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%arg2 : tensor<4x5xf32>) -> tensor<4x5xf32> - return %3 : tensor<4x5xf32> + func.return %3 : tensor<4x5xf32> } // ----- @@ -377,10 +377,10 @@ // MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<4x?xf32>, // MATMUL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32>, // MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>, -func @dynamic_extract_slice_missing(%arg0: tensor<4x?xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { +func.func @dynamic_extract_slice_missing(%arg0: tensor<4x?xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { %0 = affine.min #map0()[%iv2] // MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG1]] @@ -393,7 +393,7 @@ // MATMUL-SAME: outs(%[[T1]] %4 = linalg.matmul ins(%arg0, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - return %5 : tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> } // ----- @@ -402,10 +402,10 @@ // INPUTS-ONLY: static_input_padding_only // INPUTS-ONLY-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>, -func @static_input_padding_only(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { +func.func @static_input_padding_only(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { %0 = affine.min #map0()[%iv2] %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor @@ -420,7 +420,7 @@ // INPUTS-ONLY-SAME: outs(%[[T0]] %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - return %5 : tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> } // ----- @@ -431,10 +431,10 @@ // INPUTS-ONLY-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32>, // INPUTS-ONLY-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32>, // INPUTS-ONLY-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>, -func @dynamic_input_padding_only(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { +func.func @dynamic_input_padding_only(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { %0 = affine.min #map0()[%iv2] // INPUTS-ONLY: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] @@ -449,7 +449,7 @@ // INPUTS-ONLY-SAME: outs(%[[T2]] %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x?xf32>) -> tensor<4x?xf32> %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, %0] [1, 1] : tensor<4x?xf32> into tensor<24x25xf32> - return %5 : tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> } // ----- @@ -458,8 +458,8 @@ // FILL: rank_reducing // FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<1x64x1x64xf32> -func @rank_reducing(%arg0: tensor<1x64x1x64xf32>, - %iv0 : index) -> tensor<1x?x?xf32> { +func.func @rank_reducing(%arg0: tensor<1x64x1x64xf32>, + %iv0 : index) -> tensor<1x?x?xf32> { %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0, 0, 0] [1, %size, 1, %size] [1, 1, 1, 1] : tensor<1x64x1x64xf32> to tensor<1x?x?xf32> @@ -470,5 +470,5 @@ // FILL-SAME: tensor<1x64x64xf32> // FILL: = tensor.extract_slice %[[T1]] %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32> - return %1 : tensor<1x?x?xf32> + func.return %1 : tensor<1x?x?xf32> } diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp --- a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp @@ -96,23 +96,21 @@ llvm::cl::init(false)}; Option pad{*this, "pad", llvm::cl::desc("Pad the operands."), llvm::cl::init(false)}; - Option padInputsOnly{ - *this, "pad-inputs-only", - llvm::cl::desc("Only pad input operands when test-pad-pattern"), - llvm::cl::init(false)}; + ListOption paddingValues{ + *this, "padding-values", + llvm::cl::desc("Operand padding values parsed by the attribute parser."), + llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated}; ListOption packPaddings{ - *this, "pack-paddings", - llvm::cl::desc("Operand packing flags when test-pad-pattern."), + *this, "pack-paddings", llvm::cl::desc("Operand packing flags."), llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated}; ListOption hoistPaddings{ - *this, "hoist-paddings", - llvm::cl::desc("Operand hoisting depths when test-pad-pattern."), + *this, "hoist-paddings", llvm::cl::desc("Operand hoisting depths."), llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated}; ListOption transposePaddings{ *this, "transpose-paddings", llvm::cl::desc( - "Transpose paddings when test-pad-pattern. Specify a " - "operand dimension interchange using the following format:\n" + "Transpose paddings. Specify a operand dimension interchange " + "using the following format:\n" "-transpose-paddings=1:0:2,0:1,0:1\n" "It defines the interchange [1, 0, 2] for operand one and " "the interchange [0, 1] (no transpose) for the remaining operands." @@ -225,14 +223,6 @@ } } // namespace -// For now, just assume it is the zero of type. -// In the future, it should be the zero of type + op. -static Value getNeutralOfLinalgOp(OpBuilder &b, OpOperand &op) { - auto t = getElementTypeOrSelf(op.get()); - return b.create(op.getOwner()->getLoc(), t, - b.getZeroAttr(t)); -} - /// Apply transformations specified as patterns. void TestLinalgCodegenStrategy::runOnOperation() { if (!anchorFuncOpName.empty() && anchorFuncOpName != getOperation().getName()) @@ -255,44 +245,26 @@ registerTilingOptions = registerTilingOptions.setTileSizes(registerTileSizes); - LinalgPaddingOptions paddingOptions; - auto packFunc = [&](OpOperand &opOperand) { - return opOperand.getOperandNumber() < packPaddings.size() - ? packPaddings[opOperand.getOperandNumber()] - : false; - }; - auto hoistingFunc = [&](OpOperand &opOperand) { - return opOperand.getOperandNumber() < hoistPaddings.size() - ? hoistPaddings[opOperand.getOperandNumber()] - : 0; - }; - auto transposeFunc = [&](OpOperand &opOperand) { + // Parse the transpose vectors. + SmallVector> transposePaddingVectors; + for (const std::string &transposePadding : transposePaddings) { SmallVector transposeVector = {}; - if (opOperand.getOperandNumber() >= transposePaddings.size()) - return transposeVector; - SmallVector elems; - StringRef(transposePaddings[opOperand.getOperandNumber()]) - .split(elems, ':'); - for (StringRef elem : elems) - transposeVector.push_back(std::stoi(elem.str())); - return transposeVector; - }; - paddingOptions.setPaddingValueComputationFunction(getNeutralOfLinalgOp); - paddingOptions.setPaddingNoFoldComputationFunction(packFunc); - paddingOptions.setPaddingHoistComputationFunction(hoistingFunc); - paddingOptions.setPaddingTransposeComputationFunction(transposeFunc); - - // Compute input padding values only an return failure for output operands. - if (padInputsOnly) { - paddingOptions.setPaddingValueComputationFunction( - [](OpBuilder &b, OpOperand &op) -> FailureOr { - auto linalgOp = dyn_cast(op.getOwner()); - if (linalgOp && linalgOp.isInputTensor(&op)) - return getNeutralOfLinalgOp(b, op); - return failure(); - }); + SmallVector tokens; + StringRef(transposePadding).split(tokens, ':'); + for (StringRef token : tokens) + transposeVector.push_back(std::stoi(token.str())); + transposePaddingVectors.push_back(transposeVector); } + LinalgPaddingOptions paddingOptions; + paddingOptions.setPaddingValues( + SmallVector{paddingValues.begin(), paddingValues.end()}); + paddingOptions.setPackPaddings( + SmallVector{packPaddings.begin(), packPaddings.end()}); + paddingOptions.setHoistPaddings( + SmallVector{hoistPaddings.begin(), hoistPaddings.end()}); + paddingOptions.setTransposePaddings(transposePaddingVectors); + vector::VectorContractLowering vectorContractLowering = llvm::StringSwitch( vectorizeContractionTo.getValue()) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -7075,6 +7075,7 @@ ":MathDialect", ":MemRefDialect", ":ModuleBufferization", + ":Parser", ":Pass", ":SCFDialect", ":SCFTransforms",