diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir @@ -0,0 +1,35 @@ +// RUN: mlir-opt %s -linalg-bufferize -std-bufferize -func-bufferize \ +// RUN: -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @main() { + %const = constant dense<10.0> : tensor<2xf32> + %insert_val = constant dense<20.0> : tensor<1xf32> + + // Both of these subtensor_insert ops insert into the same original tensor + // value `%const`. This can easily cause bugs if at the memref level + // we attempt to write in-place into the memref that %const has been + // converted into. + %inserted_at_position_0 = subtensor_insert %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32> + %inserted_at_position_1 = subtensor_insert %insert_val into %const[1][1][1] : tensor<1xf32> into tensor<2xf32> + + %unranked_at_position_0 = tensor_cast %inserted_at_position_0 : tensor<2xf32> to tensor<*xf32> + call @print_memref_f32(%unranked_at_position_0) : (tensor<*xf32>) -> () + + // CHECK: Unranked Memref base@ = {{0x[-9a-f]*}} + // CHECK-SAME: rank = 1 offset = 0 sizes = [2] strides = [1] data = + // CHECK-NEXT: [20, 10] + + %unranked_at_position_1 = tensor_cast %inserted_at_position_1 : tensor<2xf32> to tensor<*xf32> + call @print_memref_f32(%unranked_at_position_1) : (tensor<*xf32>) -> () + + // CHECK: Unranked Memref base@ = {{0x[-9a-f]*}} + // CHECK-SAME: rank = 1 offset = 0 sizes = [2] strides = [1] data = + // CHECK-NEXT: [10, 20] + + return +} + +func @print_memref_f32(%ptr : tensor<*xf32>) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp @@ -40,6 +40,19 @@ return b.create(loc, val, b.getIndexType()); } +static Value cloneMemref(Location loc, Value memref, OpBuilder &b) { + auto memrefType = memref.getType().cast(); + SmallVector dynOperands; + for (auto dim : llvm::enumerate(memrefType.getShape())) { + if (dim.value() == TensorType::kDynamicSize) { + dynOperands.push_back(b.create(loc, memref, dim.index())); + } + } + auto alloc = b.create(loc, memrefType, dynOperands); + b.create(loc, memref, alloc); + return alloc; +} + static LogicalResult allocateBuffersForResults(Location loc, LinalgOp linalgOp, linalg::GenericOpAdaptor &adaptor, @@ -65,19 +78,10 @@ // results. // TODO: update this assumption because the reality is more complex // under linalg on tensor based transformations. - bool foldedInitTensor = resultIndex < linalgOp.getNumInitTensors(); - if (foldedInitTensor) { - Value initTensor = linalgOp.getInitTensor(resultIndex); - Value initBuffer = adaptor.init_tensors()[resultIndex]; - SmallVector dynOperands; - for (auto dim : llvm::enumerate(tensorShape)) { - if (dim.value() == TensorType::kDynamicSize) { - dynOperands.push_back(b.create(loc, initTensor, dim.index())); - } - } - auto alloc = b.create(loc, memrefType, dynOperands); - b.create(loc, initBuffer, alloc); - resultBuffers.push_back(alloc); + bool hasInitTensor = resultIndex < linalgOp.getNumInitTensors(); + if (hasInitTensor) { + resultBuffers.push_back( + cloneMemref(loc, adaptor.init_tensors()[resultIndex], b)); continue; } @@ -302,7 +306,10 @@ Value sourceMemRef = adaptor.source(); assert(sourceMemRef.getType().isa()); - Value destMemRef = adaptor.dest(); + // For now, be conservative and copy the converted input memref. + // In general, the converted input memref here could be aliased or could + // point into constant memory, so mutating it would lead to miscompilations. + Value destMemRef = cloneMemref(op.getLoc(), adaptor.dest(), rewriter); assert(destMemRef.getType().isa()); // Take a subview to copy the small memref. diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -199,20 +199,33 @@ // CHECK: %[[IDX:.*]] = call @make_index() : () -> index %i0 = call @make_index() : () -> index + // CHECK-DAG: %[[M0:.*]] = tensor_to_memref %[[T]] : memref // CHECK-DAG: %[[SM0:.*]] = tensor_to_memref %[[ST0]] : memref<2x3xf32> - // CHECK-NEXT: %[[SUBVIEW0:.*]] = subview %[[M0]][0, 0] [2, 3] [1, 1] + // CHECK-NEXT: %[[C0:.*]] = constant 0 : index + // CHECK-NEXT: %[[DIM0:.*]] = dim %[[M0]], %[[C0]] : memref + // CHECK-NEXT: %[[C1:.*]] = constant 1 : index + // CHECK-NEXT: %[[DIM1:.*]] = dim %[[M0]], %[[C1]] : memref + // CHECK-NEXT: %[[M0_COPY:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref + // CHECK-NEXT: linalg.copy(%[[M0]], %[[M0_COPY]]) : memref, memref + // CHECK-NEXT: %[[SUBVIEW0:.*]] = subview %[[M0_COPY]][0, 0] [2, 3] [1, 1] // CHECK-SAME: memref to memref<2x3xf32, #[[$MAP0]]> // CHECK-NEXT: linalg.copy(%[[SM0]], %[[SUBVIEW0]]) : memref<2x3xf32>, memref<2x3xf32, #[[$MAP0]]> - // CHECK-NEXT: %[[RT0:.*]] = tensor_load %[[M0]] : memref + // CHECK-NEXT: %[[RT0:.*]] = tensor_load %[[M0_COPY]] : memref %t0 = subtensor_insert %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor // CHECK-DAG: %[[M1:.*]] = tensor_to_memref %[[T]] : memref // CHECK-DAG: %[[SM1:.*]] = tensor_to_memref %[[ST1]] : memref<2x?xf32> - // CHECK-NEXT: %[[SUBVIEW1:.*]] = subview %[[M1]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2] + // CHECK-NEXT: %[[C0:.*]] = constant 0 : index + // CHECK-NEXT: %[[DIM0:.*]] = dim %[[M1]], %[[C0]] : memref + // CHECK-NEXT: %[[C1:.*]] = constant 1 : index + // CHECK-NEXT: %[[DIM1:.*]] = dim %[[M1]], %[[C1]] : memref + // CHECK-NEXT: %[[M1_COPY:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref + // CHECK-NEXT: linalg.copy(%[[M1]], %[[M1_COPY]]) : memref, memref + // CHECK-NEXT: %[[SUBVIEW1:.*]] = subview %[[M1_COPY]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2] // CHECK-SAME: memref to memref<2x?xf32, #[[$MAP1]]> // CHECK-NEXT: linalg.copy(%[[SM1]], %[[SUBVIEW1]]) : memref<2x?xf32>, memref<2x?xf32, #[[$MAP1]]> - // CHECK-NEXT: %[[RT1:.*]] = tensor_load %[[M1]] : memref + // CHECK-NEXT: %[[RT1:.*]] = tensor_load %[[M1_COPY]] : memref %t1 = subtensor_insert %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor // CHECK: return %[[RT0]], %[[RT1]]