diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp --- a/mlir/lib/Analysis/Utils.cpp +++ b/mlir/lib/Analysis/Utils.cpp @@ -207,7 +207,8 @@ // Check if src and dst loop bounds are the same. If not, we can guarantee // that the slice is not maximal. - if (srcLbResult != dstLbResult || srcUbResult != dstUbResult) + if (srcLbResult != dstLbResult || srcUbResult != dstUbResult || + srcLoop.getStep() != dstLoop.getStep()) return false; } diff --git a/mlir/test/Transforms/loop-fusion-4.mlir b/mlir/test/Transforms/loop-fusion-4.mlir --- a/mlir/test/Transforms/loop-fusion-4.mlir +++ b/mlir/test/Transforms/loop-fusion-4.mlir @@ -109,3 +109,26 @@ // SIBLING-MAXIMAL-NEXT: affine.for %[[idx_1:.*]] = 0 to 64 { // SIBLING-MAXIMAL-NEXT: %[[result_1:.*]] = affine.for %[[idx_2:.*]] = 0 to 32 iter_args(%[[iter_0:.*]] = %[[cst_1]]) -> (f32) { // SIBLING-MAXIMAL-NEXT: %[[result_0:.*]] = affine.for %[[idx_3:.*]] = 0 to 64 iter_args(%[[iter_1:.*]] = %[[cst_0]]) -> (f32) { + +// ----- + +// Expects fusion of producer into consumer at depth 1 and source loop to not +// be removed due to difference in loop steps. +// PRODUCER-CONSUMER-LABEL: func @check_src_dst_step +func @check_src_dst_step(%m : memref<100xf32>, + %src: memref<100xf32>, + %out: memref<100xf32>) { + affine.for %i0 = 0 to 100 { + %r1 = affine.load %src[%i0]: memref<100xf32> + affine.store %r1, %m[%i0] : memref<100xf32> + } + affine.for %i2 = 0 to 100 step 2 { + %r2 = affine.load %m[%i2] : memref<100xf32> + affine.store %r2, %out[%i2] : memref<100xf32> + } + return +} + +// PRODUCER-CONSUMER: affine.for +// PRODUCER-CONSUMER: affine.for +// PRODUCER-CONSUMER: return