diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp --- a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp @@ -138,7 +138,8 @@ auto yield = cast(forOp.getBody()->getTerminator()); for (int64_t i = 0; i < maxStage; i++) { // special handling for induction variable as the increment is implicit. - Value iv = rewriter.create(forOp.getLoc(), lb + i); + Value iv = + rewriter.create(forOp.getLoc(), lb + i * step); setValueMapping(forOp.getInductionVar(), iv, i); for (Operation *op : opOrder) { if (stages[op] > i) diff --git a/mlir/test/Dialect/SCF/loop-pipelining.mlir b/mlir/test/Dialect/SCF/loop-pipelining.mlir --- a/mlir/test/Dialect/SCF/loop-pipelining.mlir +++ b/mlir/test/Dialect/SCF/loop-pipelining.mlir @@ -34,6 +34,45 @@ // ----- +// CHECK-LABEL: simple_pipeline_step( +// CHECK-SAME: %[[A:.*]]: memref, %[[R:.*]]: memref) { +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index +// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index +// Prologue: +// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref +// CHECK: %[[L1:.*]] = memref.load %[[A]][%[[C3]]] : memref +// Kernel: +// CHECK-NEXT: %[[L2:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C5]] +// CHECK-SAME: step %[[C3]] iter_args(%[[LARG0:.*]] = %[[L0]], %[[LARG1:.*]] = %[[L1]]) -> (f32, f32) { +// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[LARG0]], %{{.*}} : f32 +// CHECK-NEXT: memref.store %[[ADD0]], %[[R]][%[[IV]]] : memref +// CHECK-NEXT: %[[IV1:.*]] = arith.addi %[[IV]], %[[C6]] : index +// CHECK-NEXT: %[[LR:.*]] = memref.load %[[A]][%[[IV1]]] : memref +// CHECK-NEXT: scf.yield %[[LARG1]], %[[LR]] : f32, f32 +// CHECK-NEXT: } +// Epilogue: +// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[L2]]#0, %{{.*}} : f32 +// CHECK-NEXT: memref.store %[[ADD1]], %[[R]][%[[C6]]] : memref +// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[L2]]#1, %{{.*}} : f32 +// CHECK-NEXT: memref.store %[[ADD2]], %[[R]][%[[C9]]] : memref +func @simple_pipeline_step(%A: memref, %result: memref) { + %c0 = arith.constant 0 : index + %c3 = arith.constant 3 : index + %c11 = arith.constant 11 : index + %cf = arith.constant 1.0 : f32 + scf.for %i0 = %c0 to %c11 step %c3 { + %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref + %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : f32 + memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 1 } : memref + } { __test_pipelining_loop__ } + return +} + +// ----- + // CHECK-LABEL: three_stage( // CHECK-SAME: %[[A:.*]]: memref, %[[R:.*]]: memref) { // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index