diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -182,9 +182,16 @@ if (failed(paddingValue)) return failure(hasDynamicShape); - // Cannot construct a static bounding box if the operand is not defined by an - // ExtractSliceOp. - auto sliceOp = opOperand->get().getDefiningOp(); + // Follow the use-def chain if `currOpOperand` is defined by a LinalgOp. + OpOperand *currOpOperand = opOperand; + while (auto linalgOp = currOpOperand->get().getDefiningOp()) { + OpResult result = currOpOperand->get().cast(); + currOpOperand = linalgOp.getOutputOperand(result.getResultNumber()); + } + + // Cannot construct a static bounding box if the `currOpOperand` is not + // defined by an ExtractSliceOp. + auto sliceOp = currOpOperand->get().getDefiningOp(); if (!sliceOp) return failure(hasDynamicShape); diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir --- a/mlir/test/Dialect/Linalg/pad.mlir +++ b/mlir/test/Dialect/Linalg/pad.mlir @@ -1,10 +1,11 @@ -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=MATMUL -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=FILL -// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 pad-inputs-only run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=MATMUL +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad pack-paddings=1,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad pack-paddings=1,0 run-enable-pass=false" -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,0 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL-MATMUL +// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 pad-inputs-only run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY // MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 12, 7)> // MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)> -#map = affine_map<()[s0] -> (7, -s0 + 12)> +#map = affine_map<()[s0] -> (-s0 + 12, 7)> // MATMUL: static_sizes_output_divisible // MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32> @@ -13,10 +14,10 @@ // MATMUL-SAME: %[[IV0:[0-9a-zA-Z]*]]: index // MATMUL-SAME: %[[IV1:[0-9a-zA-Z]*]]: index // MATMUL-SAME: %[[IV2:[0-9a-zA-Z]*]]: index -func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { +func.func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { // MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index // MATMUL: %[[TS2:.*]] = affine.min #[[MAP0]]()[%[[IV2]]] @@ -43,24 +44,24 @@ // MATMUL: %[[T6:.*]] = tensor.insert_slice %[[T5]] %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - return %5 : tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> } // ----- // MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 25, 7)> // MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)> -#map = affine_map<()[s0] -> (7, -s0 + 25)> +#map = affine_map<()[s0] -> (-s0 + 25, 7)> // MATMUL: static_sizes_input_divisible // MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32> // MATMUL-SAME: %[[IV0:[0-9a-zA-Z]*]]: index // MATMUL-SAME: %[[IV1:[0-9a-zA-Z]*]]: index // MATMUL-SAME: %[[IV2:[0-9a-zA-Z]*]]: index -func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { +func.func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { // MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index %3 = tensor.extract_slice %arg0[%iv0, %iv2] [4, 6] [1, 1] : tensor<24x12xf32> to tensor<4x6xf32> @@ -86,7 +87,7 @@ %8 = tensor.insert_slice %7 into %arg2[%iv0, %iv1] [4, %4] [1, 1] : tensor<4x?xf32> into tensor<24x25xf32> // MATMUL: return %[[T4]] - return %8 : tensor<24x25xf32> + func.return %8 : tensor<24x25xf32> } // ----- @@ -97,9 +98,9 @@ // MATMUL-DAG: #[[MAP3:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 5)> // MATMUL-DAG: #[[MAP4:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 6)> -#map0 = affine_map<()[s0, s1] -> (5, -s0 + s1)> -#map1 = affine_map<()[s0, s1] -> (6, -s0 + s1)> -#map2 = affine_map<()[s0, s1] -> (7, -s0 + s1)> +#map0 = affine_map<()[s0, s1] -> (-s0 + s1, 5)> +#map1 = affine_map<()[s0, s1] -> (-s0 + s1, 6)> +#map2 = affine_map<()[s0, s1] -> (-s0 + s1, 7)> // MATMUL: dynamic_sizes // MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor @@ -108,10 +109,10 @@ // MATMUL-SAME: %[[IV0:[0-9a-zA-Z]*]]: index // MATMUL-SAME: %[[IV1:[0-9a-zA-Z]*]]: index // MATMUL-SAME: %[[IV2:[0-9a-zA-Z]*]]: index -func @dynamic_sizes(%arg0: tensor, - %arg1: tensor, - %arg2: tensor, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor { +func.func @dynamic_sizes(%arg0: tensor, + %arg1: tensor, + %arg2: tensor, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor { // MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index // MATMUL-DAG: %[[C1:.*]] = arith.constant 1 %c1 = arith.constant 1 : index @@ -156,29 +157,53 @@ %13 = tensor.insert_slice %12 into %arg2[%iv0, %iv1] [%6, %9] [1, 1] : tensor into tensor // MATMUL: return %[[T8]] - return %13 : tensor + func.return %13 : tensor } // ----- #map0 = affine_map<()[s0] -> (64, s0)> -// FILL: pad_multiple -// FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32> -func @pad_multiple(%arg0: tensor<64x64xf32>, - %iv0 : index) -> tensor { +// FILL-MATMUL: pad_multiple +// FILL-MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32> +func.func @pad_multiple(%arg0: tensor<64x64xf32>, + %iv0 : index) -> tensor { %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] + + // FILL-MATMUL: %[[T0:.*]] = tensor.extract_slice %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor - // Check both fill operations are padded by the same pad tensor operation. - // FILL: %[[T0:.*]] = tensor.pad - // FILL: %[[T1:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T0]] - // FILL: %[[T2:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T1]] - // FILL: = tensor.extract_slice %[[T2]] + // Check the two operations are padded by the same pad tensor operation. + // FILL-MATMUL: %[[T1:.*]] = tensor.pad %[[T0]] + // FILL-MATMUL: %[[T2:.*]] = linalg.fill {{.*}} outs(%[[T1]] + // FILL-MATMUL: %[[T3:.*]] = linalg.matmul {{.*}} outs(%[[T2]] + // FILL-MATMUL: = tensor.extract_slice %[[T3]] + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %2 = linalg.matmul ins(%0, %0 : tensor, tensor) outs(%1 : tensor) -> tensor + func.return %2 : tensor +} + +// ----- + +#map0 = affine_map<()[s0] -> (64, s0)> + +// MATMUL: pad_chain +// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32> +func.func @pad_chain(%arg0: tensor<64x64xf32>, + %iv0 : index) -> tensor { + %cst = arith.constant 0.0 : f32 + %size = affine.min #map0()[%iv0] + %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor + + // Check the matmul at the end of the use-def chain is padded. + // MATMUL: %[[T0:.*]] = linalg.fill + // MATMUL: %[[T1:.*]] = tensor.pad %[[T0]] + // MATMUL: %[[T2:.*]] = linalg.matmul {{.*}} outs(%[[T1]] + // MATMUL: = tensor.extract_slice %[[T2]] %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor - return %2 : tensor + %2 = linalg.matmul ins(%0, %0 : tensor, tensor) outs(%1 : tensor) -> tensor + func.return %2 : tensor } // ----- @@ -187,8 +212,8 @@ // MATMUL: compose_padding // MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32> -func @compose_padding(%arg0: tensor<64x64xf32>, - %iv0 : index) -> tensor { +func.func @compose_padding(%arg0: tensor<64x64xf32>, + %iv0 : index) -> tensor { %cst = arith.constant 0.0 : f32 // MATMUL: %[[SIZE:.*]] = affine.min @@ -220,7 +245,7 @@ %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor // MATMUL: return %[[T5]] - return %5 : tensor + func.return %5 : tensor } // ----- @@ -228,8 +253,8 @@ #map0 = affine_map<()[s0] -> (64, s0)> // MATMUL: different_padding_values -func @different_padding_values(%arg0: tensor<64x64xf32>, - %iv0 : index) -> tensor { +func.func @different_padding_values(%arg0: tensor<64x64xf32>, + %iv0 : index) -> tensor { %cst = arith.constant 42.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor @@ -245,7 +270,7 @@ // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor - return %5 : tensor + func.return %5 : tensor } // ----- @@ -253,8 +278,8 @@ #map0 = affine_map<()[s0] -> (64, s0)> // MATMUL: different_padding_dynamic_sizes -func @different_padding_dynamic_sizes(%arg0: tensor<64x64xf32>, - %iv0 : index) -> tensor { +func.func @different_padding_dynamic_sizes(%arg0: tensor<64x64xf32>, + %iv0 : index) -> tensor { %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%iv0, %iv0] [1, 1] : tensor<64x64xf32> to tensor @@ -270,7 +295,7 @@ // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor - return %5 : tensor + func.return %5 : tensor } // ----- @@ -278,8 +303,8 @@ #map0 = affine_map<()[s0] -> (64, s0)> // MATMUL: different_padding_dynamic_rank -func @different_padding_dynamic_rank(%arg0: tensor<64x64x1xf32>, - %iv0 : index) -> tensor { +func.func @different_padding_dynamic_rank(%arg0: tensor<64x64x1xf32>, + %iv0 : index) -> tensor { %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0, 0] [%size, %size, 1] [1, 1, 1] : tensor<64x64x1xf32> to tensor @@ -295,7 +320,7 @@ // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %4 = linalg.matmul ins(%3, %3 : tensor, tensor) outs(%3 : tensor) -> tensor - return %4 : tensor + func.return %4 : tensor } // ----- @@ -303,8 +328,8 @@ #map0 = affine_map<()[s0] -> (64, s0)> // MATMUL: different_padding_static_sizes -func @different_padding_static_sizes(%arg0: tensor<62x62xf32>, - %iv0 : index) -> tensor { +func.func @different_padding_static_sizes(%arg0: tensor<62x62xf32>, + %iv0 : index) -> tensor { %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor @@ -320,7 +345,7 @@ // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor - return %5 : tensor + func.return %5 : tensor } // ----- @@ -330,9 +355,9 @@ // FILL: scalar_operand // FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: f32 // FILL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<24x12xf32> -func @scalar_operand(%arg0: f32, - %arg1: tensor<24x12xf32>, - %iv0 : index) -> tensor<24x12xf32> { +func.func @scalar_operand(%arg0: f32, + %arg1: tensor<24x12xf32>, + %iv0 : index) -> tensor<24x12xf32> { %0 = affine.min #map0()[%iv0] // FILL: %[[T0:.*]] = tensor.extract_slice %[[ARG1]] @@ -343,7 +368,7 @@ // FILL: %[[T6:.*]] = linalg.fill ins(%[[ARG0]]{{.*}}outs(%[[T1]] %2 = linalg.fill ins(%arg0 : f32) outs(%1 : tensor<4x?xf32>) -> tensor<4x?xf32> %3 = tensor.insert_slice %2 into %arg1[0, 0] [4, %0] [1, 1] : tensor<4x?xf32> into tensor<24x12xf32> - return %3 : tensor<24x12xf32> + func.return %3 : tensor<24x12xf32> } // ----- @@ -352,10 +377,10 @@ // MATMUL: static_extract_slice_missing // MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<4x5xf32>, -func @static_extract_slice_missing(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<4x5xf32>, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<4x5xf32> { +func.func @static_extract_slice_missing(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<4x5xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<4x5xf32> { %0 = affine.min #map0()[%iv2] %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor @@ -366,7 +391,7 @@ // MATMUL: = linalg.matmul ins(%[[T0]], %[[T1]] // MATMUL-SAME: outs(%[[ARG2]] %3 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%arg2 : tensor<4x5xf32>) -> tensor<4x5xf32> - return %3 : tensor<4x5xf32> + func.return %3 : tensor<4x5xf32> } // ----- @@ -377,10 +402,10 @@ // MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<4x?xf32>, // MATMUL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32>, // MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>, -func @dynamic_extract_slice_missing(%arg0: tensor<4x?xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { +func.func @dynamic_extract_slice_missing(%arg0: tensor<4x?xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { %0 = affine.min #map0()[%iv2] // MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG1]] @@ -393,7 +418,7 @@ // MATMUL-SAME: outs(%[[T1]] %4 = linalg.matmul ins(%arg0, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - return %5 : tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> } // ----- @@ -402,10 +427,10 @@ // INPUTS-ONLY: static_input_padding_only // INPUTS-ONLY-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>, -func @static_input_padding_only(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { +func.func @static_input_padding_only(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { %0 = affine.min #map0()[%iv2] %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor @@ -420,7 +445,7 @@ // INPUTS-ONLY-SAME: outs(%[[T0]] %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - return %5 : tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> } // ----- @@ -431,10 +456,10 @@ // INPUTS-ONLY-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32>, // INPUTS-ONLY-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32>, // INPUTS-ONLY-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>, -func @dynamic_input_padding_only(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { +func.func @dynamic_input_padding_only(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { %0 = affine.min #map0()[%iv2] // INPUTS-ONLY: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] @@ -449,7 +474,7 @@ // INPUTS-ONLY-SAME: outs(%[[T2]] %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x?xf32>) -> tensor<4x?xf32> %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, %0] [1, 1] : tensor<4x?xf32> into tensor<24x25xf32> - return %5 : tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> } // ----- @@ -458,8 +483,8 @@ // FILL: rank_reducing // FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<1x64x1x64xf32> -func @rank_reducing(%arg0: tensor<1x64x1x64xf32>, - %iv0 : index) -> tensor<1x?x?xf32> { +func.func @rank_reducing(%arg0: tensor<1x64x1x64xf32>, + %iv0 : index) -> tensor<1x?x?xf32> { %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0, 0, 0] [1, %size, 1, %size] [1, 1, 1, 1] : tensor<1x64x1x64xf32> to tensor<1x?x?xf32> @@ -470,5 +495,5 @@ // FILL-SAME: tensor<1x64x64xf32> // FILL: = tensor.extract_slice %[[T1]] %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32> - return %1 : tensor<1x?x?xf32> + func.return %1 : tensor<1x?x?xf32> } diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp --- a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp @@ -184,6 +184,7 @@ LinalgPaddingOptions paddingOptions, vector::VectorContractLowering vectorContractLowering, vector::VectorTransferSplit vectorTransferSplit) { + std::string anchorOpNameOrWildcard = fuse ? "" : anchorOpName.getValue(); CodegenStrategy strategy; strategy .tileAndFuseIf(fuse && !tileSizes.empty(), anchorOpName, @@ -199,11 +200,11 @@ LinalgPromotionOptions() .setAlignment(16) .setUseFullTileBuffersByDefault(registerPromoteFullTile)) - .padIf(pad, "", std::move(paddingOptions)) + .padIf(pad, anchorOpNameOrWildcard, std::move(paddingOptions)) .decomposeIf(decompose) - .generalizeIf(generalize, "") + .generalizeIf(generalize, anchorOpNameOrWildcard) .interchangeIf(!iteratorInterchange.empty(), iteratorInterchange) - .vectorizeIf(vectorize, "", nullptr, vectorizePadding) + .vectorizeIf(vectorize, anchorOpNameOrWildcard, nullptr, vectorizePadding) .vectorLowering( LinalgVectorLoweringOptions() .setVectorTransformsOptions(