diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp --- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp @@ -195,6 +195,10 @@ SmallVector opsToMove; for (auto &op : *loopBody) { + // Prevent hoisting of the users of affine loop results. + if (isa(op) && op.getNumResults()) { + definedOps.insert(&op); + } // We don't hoist for loops. if (!isa(op)) { if (!isa(op)) { diff --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir --- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir +++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir @@ -613,3 +613,31 @@ // CHECK-NEXT: addf // CHECK-NEXT: affine.vector_store // CHECK-NEXT: affine.for + +// ----- + +// CHECK-LABEL: func @reduction_loop_no_invariant( +func @reduction_loop_no_invariant(%arg0 : memref<30x512xf32, 1>, + %arg1 : memref<30xf32, 1>) { + %accum = memref.alloca() : memref<64xf32> + %zero = constant dense<0.0> : vector<64xf32> + affine.for %dim1 = 0 to 30 { + %vecAccum = affine.for %dim0 = 0 to 512 step 64 iter_args (%prevAccum = %zero) -> vector<64xf32> { + %arg0Vector = affine.vector_load %arg0[%dim1, %dim0] : memref<30x512xf32, 1>, vector<64xf32> + %newAccum = addf %prevAccum, %arg0Vector : vector<64xf32> + affine.yield %newAccum : vector<64xf32> + } + %scalarAccum = vector.reduction "add", %vecAccum : vector<64xf32> into f32 + affine.store %scalarAccum, %arg1[%dim1] : memref<30xf32, 1> + } + return +} + +// CHECK: affine.for +// CHECK: %[[sum:.*]] = affine.for +// CHECK: affine.vector_load +// CHECK: addf +// CHECK: affine.yield +// CHECK: } +// CHECK: vector.reduction "add", %[[sum:.*]] +// CHECK: affine.store