diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -308,10 +308,11 @@ // This may evolve in the future. unsigned nWin = nPar - getNumBatchDimensions() - getNumInputFeatureDimensions(); - SmallVector iters(nPar, getParallelIteratorTypeName()); + SmallVector iters(nPar - getNumOutputFeatureDimensions(), getParallelIteratorTypeName()); iters.reserve(nPar + nRed + nWin); - iters.append(nRed, getReductionIteratorTypeName()); iters.append(nWin, getWindowIteratorTypeName()); + iters.append(nRed, getReductionIteratorTypeName()); + iters.append(getNumOutputFeatureDimensions(), getParallelIteratorTypeName()); return Builder(getContext()).getStrArrayAttr(iters); } @@ -336,13 +337,13 @@ // * output filter dimensions (ks with #ks = 1 for now) auto bs = makeAffineDimExprs(getNumBatchDimensions(), idx, context); auto xs = makeAffineDimExprs(nWin, idx, context); - auto ks = makeAffineDimExprs( - getNumOutputFeatureDimensions(), idx, context); + // Window reduction dims: sum_{z[0], ..., z[N-1], q} + auto zs = makeAffineDimExprs(nWin, idx, context); // Non-window reduction dim: sum_{z[0], ..., z[N-1], q} auto qs = makeAffineDimExprs( getNumInputFeatureDimensions(), idx, context); - // Window reduction dims: sum_{z[0], ..., z[N-1], q} - auto zs = makeAffineDimExprs(nWin, idx, context); + auto ks = makeAffineDimExprs( + getNumOutputFeatureDimensions(), idx, context); // Construct the weighedSum expression. auto ws = weightedPoolingInputIndex(*this, xs, zs); return Builder(getContext()).getAffineMapArrayAttr({ diff --git a/mlir/test/Dialect/Linalg/affine.mlir b/mlir/test/Dialect/Linalg/affine.mlir --- a/mlir/test/Dialect/Linalg/affine.mlir +++ b/mlir/test/Dialect/Linalg/affine.mlir @@ -50,9 +50,9 @@ // CHECK: %[[X0:.*]] = dim %arg2, %c1 : memref // CHECK: affine.for %{{.*}} = 0 to %[[B]] { // CHECK: affine.for %{{.*}} = 0 to %[[X0]] { -// CHECK: affine.for %{{.*}} = 0 to %[[K]] { +// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] { // CHECK: affine.for %{{.*}} = 0 to %[[Q]] { -// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] { +// CHECK: affine.for %{{.*}} = 0 to %[[K]] { // CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) // No padding needed here; only affine loads. // CHECK-NEXT: affine.load @@ -80,10 +80,10 @@ // CHECK: affine.for %{{.*}} = 0 to %[[B]] { // CHECK: affine.for %{{.*}} = 0 to %[[X0]] { // CHECK: affine.for %{{.*}} = 0 to %[[X1]] { -// CHECK: affine.for %{{.*}} = 0 to %[[K]] { -// CHECK: affine.for %{{.*}} = 0 to %[[Q]] { -// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] { -// CHECK: affine.for %{{.*}} = 0 to %[[Z1]] { +// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] { +// CHECK: affine.for %{{.*}} = 0 to %[[Z1]] { +// CHECK: affine.for %{{.*}} = 0 to %[[Q]] { +// CHECK: affine.for %{{.*}} = 0 to %[[K]] { // CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) // CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) // CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]]) diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -282,9 +282,9 @@ // CHECKLOOP: %[[X0:.*]] = dim %arg2, %c1 : memref // CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { // CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { // CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { // CHECKLOOP: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) // CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref // CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref @@ -300,16 +300,17 @@ // CHECKPARALLEL: %[[K:.*]] = dim %arg0, %c2 : memref // CHECKPARALLEL: %[[B:.*]] = dim %arg1, %c0 : memref // CHECKPARALLEL: %[[X0:.*]] = dim %arg2, %c1 : memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKPARALLEL: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[B]], %[[X0]]) step (%{{.*}}, %{{.*}}) { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKPARALLEL: scf.parallel ({{.*}}) = (%{{.*}}) to (%[[K]]) step (%{{.*}}) { +// CHECKPARALLEL: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref func @conv_view4(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.conv(%arg0, %arg1, %arg2) {dilations = [4, 5], strides = [2, 3]} : memref, memref, memref @@ -327,10 +328,10 @@ // CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { // CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { // CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { // CHECKLOOP: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]](%{{.*}}, %{{.*}}) // CHECKLOOP: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]](%{{.*}}, %{{.*}}) // CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref @@ -349,18 +350,19 @@ // CHECKPARALLEL: %[[B:.*]] = dim %arg1, %c0 : memref // CHECKPARALLEL: %[[X0:.*]] = dim %arg2, %c1 : memref // CHECKPARALLEL: %[[X1:.*]] = dim %arg2, %c2 : memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]]) step (%{{.*}}, %{{.*}}, %{{.*}}) { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%[[K]]) step (%{{.*}}) { +// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]](%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref func @conv_padding(%arg0: memref, %arg1: memref, @@ -384,10 +386,10 @@ // CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { // CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { // CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { // CHECKLOOP: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) // CHECKLOOP: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) // CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]]) @@ -410,21 +412,22 @@ // CHECKPARALLEL: %[[B:.*]] = dim %arg1, %c0 : memref // CHECKPARALLEL: %[[X0:.*]] = dim %arg2, %c1 : memref // CHECKPARALLEL: %[[X1:.*]] = dim %arg2, %c2 : memref -// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]]) -// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]]) -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]]) step (%{{.*}}, %{{.*}}, %{{.*}}) { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { +// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { +// CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%[[K]]) step (%{{.*}}) { +// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]]) +// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]]) +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref func @pooling_max(%arg0: memref, %arg1: memref, diff --git a/mlir/test/Dialect/Linalg/tile_conv.mlir b/mlir/test/Dialect/Linalg/tile_conv.mlir --- a/mlir/test/Dialect/Linalg/tile_conv.mlir +++ b/mlir/test/Dialect/Linalg/tile_conv.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004 +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,0,4" | FileCheck %s -check-prefix=TILE-23004 // TILE-23004-DAG: #[[$D0x30pS0x10:.*]] = affine_map<(d0) -> (d0 * 30)> // TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s1)>