Changeset View
Changeset View
Standalone View
Standalone View
mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
Show First 20 Lines • Show All 162 Lines • ▼ Show 20 Lines | func.func @matmul( | ||||
%c32 = arith.constant 32 : index | %c32 = arith.constant 32 : index | ||||
%cst = arith.constant 0.000000e+00 : f32 | %cst = arith.constant 0.000000e+00 : f32 | ||||
%c128 = arith.constant 128 : index | %c128 = arith.constant 128 : index | ||||
%c192 = arith.constant 192 : index | %c192 = arith.constant 192 : index | ||||
%c8 = arith.constant 8 : index | %c8 = arith.constant 8 : index | ||||
%c16 = arith.constant 16 : index | %c16 = arith.constant 16 : index | ||||
// Hoisted alloc. | // Hoisted alloc. | ||||
// CHECK: %[[ALLOC:.*]] = memref.alloc() {alignment = 128 : i64} : memref<8x16xf32> | // CHECK: %[[ALLOC:.*]] = memref.alloc() {alignment = 128 : i64} : memref<128x192xf32> | ||||
// CHECK: memref.copy %[[C]], %[[ALLOC]] | |||||
// CHECK: scf.for %[[I:.*]] = | // CHECK: scf.for %[[I:.*]] = | ||||
%0 = scf.for %arg3 = %c0 to %c128 step %c8 iter_args(%arg4 = %C) -> (tensor<128x192xf32>) { | %0 = scf.for %arg3 = %c0 to %c128 step %c8 iter_args(%arg4 = %C) -> (tensor<128x192xf32>) { | ||||
%1 = tensor.extract_slice %A[%arg3, 0] [8, 256] [1, 1] : | %1 = tensor.extract_slice %A[%arg3, 0] [8, 256] [1, 1] : | ||||
tensor<128x256xf32> to tensor<8x256xf32> | tensor<128x256xf32> to tensor<8x256xf32> | ||||
// CHECK: scf.for %[[J:.*]] = | // CHECK: scf.for %[[J:.*]] = | ||||
%2 = scf.for %arg5 = %c0 to %c192 step %c16 iter_args(%arg6 = %arg4) -> (tensor<128x192xf32>) { | %2 = scf.for %arg5 = %c0 to %c192 step %c16 iter_args(%arg6 = %arg4) -> (tensor<128x192xf32>) { | ||||
%3 = tensor.extract_slice %B[0, %arg5] [256, 16] [1, 1] : | %3 = tensor.extract_slice %B[0, %arg5] [256, 16] [1, 1] : | ||||
tensor<256x192xf32> to tensor<256x16xf32> | tensor<256x192xf32> to tensor<256x16xf32> | ||||
// %4 does not match an insert_slice, it cannot be bufferized inplace and needs to alloc. | // C was already replaced with a copy by preprocessing, so no copy is | ||||
// needed here. | |||||
// CHECK: %[[C_SLICE:.*]] = memref.subview %[[ALLOC]] | |||||
%4 = tensor.extract_slice %C[%arg3, %arg5] [8, 16] [1, 1] : | %4 = tensor.extract_slice %C[%arg3, %arg5] [8, 16] [1, 1] : | ||||
tensor<128x192xf32> to tensor<8x16xf32> | tensor<128x192xf32> to tensor<8x16xf32> | ||||
// linalg.fill is inplace. | // linalg.fill is inplace. | ||||
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[ALLOC]] : memref<8x16xf32>) | // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[C_SLICE]] | ||||
%5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<8x16xf32>) -> tensor<8x16xf32> | %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<8x16xf32>) -> tensor<8x16xf32> | ||||
// CHECK: scf.for %[[K:.*]] = | // CHECK: scf.for %[[K:.*]] = | ||||
%6 = scf.for %arg7 = %c0 to %c256 step %c32 iter_args(%arg8 = %5) -> (tensor<8x16xf32>) { | %6 = scf.for %arg7 = %c0 to %c256 step %c32 iter_args(%arg8 = %5) -> (tensor<8x16xf32>) { | ||||
%8 = tensor.extract_slice %1[0, %arg7] [8, 32] [1, 1] : | %8 = tensor.extract_slice %1[0, %arg7] [8, 32] [1, 1] : | ||||
tensor<8x256xf32> to tensor<8x32xf32> | tensor<8x256xf32> to tensor<8x32xf32> | ||||
%9 = tensor.extract_slice %3[%arg7, 0] [32, 16] [1, 1] : | %9 = tensor.extract_slice %3[%arg7, 0] [32, 16] [1, 1] : | ||||
tensor<256x16xf32> to tensor<32x16xf32> | tensor<256x16xf32> to tensor<32x16xf32> | ||||
// linalg.matmul is inplace as well as the enclosing scf.for. | // linalg.matmul is inplace as well as the enclosing scf.for. | ||||
// CHECK: linalg.matmul ins({{.*}} outs(%[[ALLOC]] | // CHECK: linalg.matmul ins({{.*}} outs(%[[C_SLICE]] | ||||
%10 = linalg.matmul ins(%8, %9 : tensor<8x32xf32>, tensor<32x16xf32>) | %10 = linalg.matmul ins(%8, %9 : tensor<8x32xf32>, tensor<32x16xf32>) | ||||
outs(%arg8 : tensor<8x16xf32>) | outs(%arg8 : tensor<8x16xf32>) | ||||
-> tensor<8x16xf32> | -> tensor<8x16xf32> | ||||
scf.yield %10 : tensor<8x16xf32> | scf.yield %10 : tensor<8x16xf32> | ||||
} | } | ||||
// insert_slice is inplace but its source comes from an equivalent buffer | // insert_slice is inplace but its source comes from an equivalent buffer | ||||
// that is not in place. So we must insert a copy of the small buffer into | // that is not in place. So we must insert a copy of the small buffer into | ||||
// the bigger buffer. | // the bigger buffer. | ||||
// CHECK: %[[T:.*]] = memref.subview %[[C]][%[[I]], %[[J]]] [8, 16] [1, 1] | // CHECK: %[[T:.*]] = memref.subview %[[C]][%[[I]], %[[J]]] [8, 16] [1, 1] | ||||
// CHECK: memref.copy %[[ALLOC]], %[[T]] | // CHECK: memref.copy %[[C_SLICE]], %[[T]] | ||||
%7 = tensor.insert_slice %6 into %arg6[%arg3, %arg5] [8, 16] [1, 1] : | %7 = tensor.insert_slice %6 into %arg6[%arg3, %arg5] [8, 16] [1, 1] : | ||||
tensor<8x16xf32> into tensor<128x192xf32> | tensor<8x16xf32> into tensor<128x192xf32> | ||||
// CHECK: memref.dealloc %[[ALLOC]] | |||||
scf.yield %7 : tensor<128x192xf32> | scf.yield %7 : tensor<128x192xf32> | ||||
} | } | ||||
scf.yield %2 : tensor<128x192xf32> | scf.yield %2 : tensor<128x192xf32> | ||||
} | } | ||||
// CHECK: memref.dealloc %[[ALLOC]] | |||||
return %0 : tensor<128x192xf32> | return %0 : tensor<128x192xf32> | ||||
} | } | ||||
// ----- | // ----- | ||||
/// This test just checks the produced IR is valid and does not have dominance | /// This test just checks the produced IR is valid and does not have dominance | ||||
/// errors in the def-use chains. | /// errors in the def-use chains. | ||||
▲ Show 20 Lines • Show All 151 Lines • Show Last 20 Lines |