diff --git a/mlir/test/Transforms/loop-fusion-4.mlir b/mlir/test/Transforms/loop-fusion-4.mlir --- a/mlir/test/Transforms/loop-fusion-4.mlir +++ b/mlir/test/Transforms/loop-fusion-4.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion="mode=producer" -split-input-file | FileCheck %s --check-prefix=PRODUCER-CONSUMER // RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion="fusion-maximal mode=sibling" -split-input-file | FileCheck %s --check-prefix=SIBLING-MAXIMAL +// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion="fusion-maximal mode=producer" -split-input-file | FileCheck %s --check-prefix=PRODUCER-CONSUMER-MAXIMAL // Part I of fusion tests in mlir/test/Transforms/loop-fusion.mlir. // Part II of fusion tests in mlir/test/Transforms/loop-fusion-2.mlir @@ -141,3 +142,73 @@ // SIBLING-MAXIMAL-NEXT: affine.for %[[idx_1:.*]] = 0 to 64 { // SIBLING-MAXIMAL-NEXT: %[[result_1:.*]] = affine.for %[[idx_2:.*]] = 0 to 32 iter_args(%[[iter_0:.*]] = %[[cst_1]]) -> (f32) { // SIBLING-MAXIMAL-NEXT: %[[result_0:.*]] = affine.for %[[idx_3:.*]] = 0 to 64 iter_args(%[[iter_1:.*]] = %[[cst_0]]) -> (f32) { + +// ----- + +// Source loop nest %i1 is a reduction but due to fusion of preceding +// preceding loop %i0 generates a producer for %i3. Check if the +// producer fusion happens at depth 2 with original bounds. +// PRODUCER-CONSUMER-MAXIMAL-LABEL: func @reduction_producer_consumer( +func.func @reduction_producer_consumer(%arg0: memref<1024xf32, 1>, %arg1: memref<1xf32, 1>, %arg2: memref<1xf32, 1>, %arg3: memref<1xf32, 1>) { + %cst = arith.constant 0.000000e+00 : f32 + %0 = memref.alloc() : memref + %1 = memref.alloc() : memref + %2 = memref.alloc() : memref<1024xf32, 1> + affine.for %i0 = 0 to 1024 { + %4 = affine.load %arg0[%i0] : memref<1024xf32, 1> + %6 = arith.addf %4, %4 : f32 + affine.store %6, %2[%i0] : memref<1024xf32, 1> + } + affine.for %i1 = 0 to 1 { + affine.store %cst, %1[] : memref + affine.for %i2 = 0 to 1024 { + %5 = affine.load %1[] : memref + %6 = affine.load %2[%i2] : memref<1024xf32, 1> + %7 = arith.addf %5, %6 : f32 + affine.store %7, %1[] : memref + } + %4 = affine.load %1[] : memref + affine.store %4, %arg2[%i1] : memref<1xf32, 1> + } + affine.for %i3 = 0 to 1 { + affine.store %cst, %0[] : memref + affine.for %i4 = 0 to 1024 { + %5 = affine.load %0[] : memref + %6 = affine.load %2[%i4] : memref<1024xf32, 1> + %7 = arith.addf %5, %6 : f32 + affine.store %7, %0[] : memref + } + %4 = affine.load %0[] : memref + affine.store %4, %arg3[%i3] : memref<1xf32, 1> + } + return +} +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_4:.*]] = memref.alloc() : memref<1xf32, 1> +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_5:.*]] = arith.constant 0 : index +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32 +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_7:.*]] = memref.alloc() : memref +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_8:.*]] = memref.alloc() : memref +// PRODUCER-CONSUMER-MAXIMAL: affine.for %[[VAL_9:.*]] = 0 to 1 { +// PRODUCER-CONSUMER-MAXIMAL: affine.store %[[VAL_6]], %[[VAL_7]][] : memref +// PRODUCER-CONSUMER-MAXIMAL: affine.for %[[VAL_10:.*]] = 0 to 1024 { +// PRODUCER-CONSUMER-MAXIMAL: affine.store %[[VAL_6]], %[[VAL_8]][] : memref +// Incorrect bounds for the innermost loop. +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_11:.*]] = affine.load %{{.*}}{{\[}}%[[VAL_10]]] : memref<1024xf32, 1> +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_12:.*]] = arith.addf %[[VAL_11]], %[[VAL_11]] : f32 +// PRODUCER-CONSUMER-MAXIMAL: affine.store %[[VAL_12]], %[[VAL_4]][0] : memref<1xf32, 1> +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_13:.*]] = affine.load %[[VAL_8]][] : memref +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_14:.*]] = affine.load %[[VAL_4]][0] : memref<1xf32, 1> +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_15:.*]] = arith.addf %[[VAL_13]], %[[VAL_14]] : f32 +// PRODUCER-CONSUMER-MAXIMAL: affine.store %[[VAL_15]], %[[VAL_8]][] : memref +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_16:.*]] = affine.load %[[VAL_8]][] : memref +// PRODUCER-CONSUMER-MAXIMAL: affine.store %[[VAL_16]], %{{.*}}{{\[}}%[[VAL_5]]] : memref<1xf32, 1> +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_17:.*]] = affine.load %[[VAL_7]][] : memref +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_18:.*]] = affine.load %[[VAL_4]][0] : memref<1xf32, 1> +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_19:.*]] = arith.addf %[[VAL_17]], %[[VAL_18]] : f32 +// PRODUCER-CONSUMER-MAXIMAL: affine.store %[[VAL_19]], %[[VAL_7]][] : memref +// PRODUCER-CONSUMER-MAXIMAL: } +// PRODUCER-CONSUMER-MAXIMAL: %[[VAL_20:.*]] = affine.load %[[VAL_7]][] : memref +// PRODUCER-CONSUMER-MAXIMAL: affine.store %[[VAL_20]], %{{.*}}{{\[}}%[[VAL_9]]] : memref<1xf32, 1> +// PRODUCER-CONSUMER-MAXIMAL: } +// PRODUCER-CONSUMER-MAXIMAL: return +// PRODUCER-CONSUMER-MAXIMAL: }