diff --git a/mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp b/mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp --- a/mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp @@ -104,11 +104,11 @@ llvm::Optional singleStep = candidateLoop.getSingleStep(); if (!inductionVar || !lowerBound || !singleStep) return failure(); + + if (!dom.dominates(allocOp.getOperation(), candidateLoop)) + return failure(); + OpBuilder builder(candidateLoop); - Value stepValue = - getOrCreateValue(*singleStep, builder, candidateLoop->getLoc()); - Value lowerBoundValue = - getOrCreateValue(*lowerBound, builder, candidateLoop->getLoc()); SmallVector newShape(1, multiplier); ArrayRef oldShape = allocOp.getType().getShape(); newShape.append(oldShape.begin(), oldShape.end()); @@ -117,15 +117,28 @@ allocOp.getType().getMemorySpace()); builder.setInsertionPoint(allocOp); Location loc = allocOp->getLoc(); - auto newAlloc = builder.create(loc, newMemref); + auto newAlloc = builder.create(loc, newMemref, ValueRange{}, + allocOp->getAttrs()); builder.setInsertionPoint(&candidateLoop.getLoopBody().front(), candidateLoop.getLoopBody().front().begin()); + + SmallVector operands = {*inductionVar}; AffineExpr induc = getAffineDimExpr(0, allocOp.getContext()); - AffineExpr init = getAffineDimExpr(1, allocOp.getContext()); - AffineExpr step = getAffineDimExpr(2, allocOp.getContext()); + unsigned dimCount = 1; + auto getAffineExpr = [&](OpFoldResult e) -> AffineExpr { + if (Optional constValue = getConstantIntValue(e)) { + return getAffineConstantExpr(*constValue, allocOp.getContext()); + } else { + auto value = getOrCreateValue(e, builder, candidateLoop->getLoc()); + operands.push_back(value); + return getAffineDimExpr(dimCount++, allocOp.getContext()); + } + }; + auto init = getAffineExpr(*lowerBound); + auto step = getAffineExpr(*singleStep); + AffineExpr expr = ((induc - init).floorDiv(step)) % multiplier; - auto map = AffineMap::get(3, 0, expr); - std::array operands = {*inductionVar, lowerBoundValue, stepValue}; + auto map = AffineMap::get(dimCount, 0, expr); Value bufferIndex = builder.create(loc, map, operands); SmallVector offsets, sizes, strides; offsets.push_back(bufferIndex); diff --git a/mlir/test/Dialect/MemRef/multibuffer.mlir b/mlir/test/Dialect/MemRef/multibuffer.mlir --- a/mlir/test/Dialect/MemRef/multibuffer.mlir +++ b/mlir/test/Dialect/MemRef/multibuffer.mlir @@ -1,19 +1,19 @@ // RUN: mlir-opt %s -allow-unregistered-dialect -test-multi-buffering=multiplier=5 -cse -split-input-file | FileCheck %s -// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (((d0 - d1) floordiv d2) mod 5)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0) -> (((d0 - 1) floordiv 3) mod 5)> // CHECK-LABEL: func @multi_buffer func.func @multi_buffer(%a: memref<1024x1024xf32>) { -// CHECK-DAG: %[[A:.*]] = memref.alloc() : memref<5x4x128xf32> +// CHECK-DAG: %[[A:.*]] = memref.alloc() {someAttribute} : memref<5x4x128xf32> // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index - %0 = memref.alloc() : memref<4x128xf32> + %0 = memref.alloc() {someAttribute} : memref<4x128xf32> %c1024 = arith.constant 1024 : index %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index // CHECK: scf.for %[[IV:.*]] = %[[C1]] scf.for %arg2 = %c1 to %c1024 step %c3 { -// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]], %[[C1]], %[[C3]]) +// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]]) // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0, 0] [1, 4, 128] [1, 1, 1] : memref<5x4x128xf32> to memref<4x128xf32, strided<[128, 1], offset: ?>> %1 = memref.subview %a[%arg2, 0] [4, 128] [1, 1] : memref<1024x1024xf32> to memref<4x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> @@ -32,15 +32,13 @@ // CHECK-LABEL: func @multi_buffer_affine func.func @multi_buffer_affine(%a: memref<1024x1024xf32>) { // CHECK-DAG: %[[A:.*]] = memref.alloc() : memref<5x4x128xf32> -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index %0 = memref.alloc() : memref<4x128xf32> %c1024 = arith.constant 1024 : index %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index // CHECK: affine.for %[[IV:.*]] = 1 affine.for %arg2 = 1 to 1024 step 3 { -// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]], %[[C1]], %[[C3]]) +// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]]) // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0, 0] [1, 4, 128] [1, 1, 1] : memref<5x4x128xf32> to memref<4x128xf32, strided<[128, 1], offset: ?>> %1 = memref.subview %a[%arg2, 0] [4, 128] [1, 1] : memref<1024x1024xf32> to memref<4x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> @@ -56,7 +54,7 @@ // ----- -// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (((d0 - d1) floordiv d2) mod 5)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0) -> (((d0 - 1) floordiv 3) mod 5)> // CHECK-LABEL: func @multi_buffer_subview_use func.func @multi_buffer_subview_use(%a: memref<1024x1024xf32>) { @@ -69,7 +67,7 @@ %c3 = arith.constant 3 : index // CHECK: scf.for %[[IV:.*]] = %[[C1]] scf.for %arg2 = %c1 to %c1024 step %c3 { -// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]], %[[C1]], %[[C3]]) +// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]]) // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0, 0] [1, 4, 128] [1, 1, 1] : memref<5x4x128xf32> to memref<4x128xf32, strided<[128, 1], offset: ?>> %1 = memref.subview %a[%arg2, 0] [4, 128] [1, 1] : memref<1024x1024xf32> to memref<4x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> diff --git a/mlir/test/Dialect/MemRef/transform-ops.mlir b/mlir/test/Dialect/MemRef/transform-ops.mlir --- a/mlir/test/Dialect/MemRef/transform-ops.mlir +++ b/mlir/test/Dialect/MemRef/transform-ops.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s -test-transform-dialect-interpreter -verify-diagnostics -allow-unregistered-dialect | FileCheck %s -// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2) -> (((d0 - d1) floordiv d2) mod 2)> +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)> // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> // CHECK-LABEL: func @multi_buffer @@ -17,7 +17,7 @@ // CHECK: scf.for %[[IV:.*]] = %[[C0]] scf.for %i0 = %c0 to %c16 step %c4 { - // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]], %[[C0]], %[[C4]]) + // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]]) // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>> %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> // CHECK: memref.copy %{{.*}}, %[[SV]] : memref<4xf32, #[[$MAP1]]> to memref<4xf32, strided<[1], offset: ?>>