diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp @@ -65,10 +65,10 @@ funcOp.getContext(), options, filter); } // Search the root operation using bottom up traversal. - GreedyRewriteConfig grc; - grc.useTopDownTraversal = false; - (void)applyPatternsAndFoldGreedily(funcOp, - std::move(tilingAndFusionPattern), grc); + GreedyRewriteConfig config; + config.useTopDownTraversal = false; + (void)applyPatternsAndFoldGreedily( + funcOp, std::move(tilingAndFusionPattern), config); } LinalgTilingAndFusionOptions options; @@ -132,7 +132,18 @@ paddingPattern.add(funcOp.getContext(), options, filter); } - if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(paddingPattern)))) + // Traverse the operations top down to pad producers before consumers. The + // extract slice operation introduced after every padded operation enables + // padding its consumers. Padding an operation whose producers have not been + // padded before fails due to the missing extract slice operations. In this + // case, the padding pattern increments the transformation marker without + // padding the operation. The top down traversal is thus not only a + // performance optimization but also needed to pad all operations along the + // use-def chains. + GreedyRewriteConfig config; + config.useTopDownTraversal = true; + if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(paddingPattern), + config))) signalPassFailure(); } diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir --- a/mlir/test/Dialect/Linalg/pad.mlir +++ b/mlir/test/Dialect/Linalg/pad.mlir @@ -163,6 +163,28 @@ #map0 = affine_map<()[s0] -> (64, s0)> +// FILL: pad_multiple +// FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32> +func @pad_multiple(%arg0: tensor<64x64xf32>, + %iv0 : index) -> tensor { + %cst = arith.constant 0.0 : f32 + %size = affine.min #map0()[%iv0] + %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor + + // Check both fill operations are padded by the same pad tensor operation. + // FILL: %[[T0:.*]] = linalg.pad_tensor + // FILL: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]]) + // FILL: %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]]) + // FILL: = tensor.extract_slice %[[T2]] + %1 = linalg.fill(%cst, %0) : f32, tensor -> tensor + %2 = linalg.fill(%cst, %1) : f32, tensor -> tensor + return %2 : tensor +} + +// ----- + +#map0 = affine_map<()[s0] -> (64, s0)> + // MATMUL: compose_padding // MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32> func @compose_padding(%arg0: tensor<64x64xf32>,