Changeset View
Changeset View
Standalone View
Standalone View
mlir/test/Transforms/affine-data-copy.mlir
// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma=false -affine-data-copy-generate-fast-mem-space=0 -affine-data-copy-generate-skip-non-unit-stride-loops | FileCheck %s | // RUN: mlir-opt %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma=false -affine-data-copy-generate-fast-mem-space=0 -affine-data-copy-generate-skip-non-unit-stride-loops | FileCheck %s | ||||
// Small buffer size to trigger fine copies. | // Small buffer size to trigger fine copies. | ||||
// RUN: mlir-opt %s -affine-data-copy-generate -affine-data-copy-generate-dma=false -affine-data-copy-generate-fast-mem-space=0 -affine-data-copy-generate-fast-mem-capacity=1 | FileCheck --check-prefix=CHECK-SMALL %s | // RUN: mlir-opt %s -affine-data-copy-generate -affine-data-copy-generate-dma=false -affine-data-copy-generate-fast-mem-space=0 -affine-data-copy-generate-fast-mem-capacity=1 | FileCheck --check-prefix=CHECK-SMALL %s | ||||
// Test affine data copy with a memref filter. We use a test pass that invokes | // Test affine data copy with a memref filter. We use a test pass that invokes | ||||
// affine data copy utility on the input loop nest. | // affine data copy utility on the input loop nest. | ||||
// '-test-affine-data-copy-memref-filter' passes the first memref found in an | // '-test-affine-data-copy-memref-filter' passes the first memref found in an | ||||
// affine.load op in the innermost loop as a filter. | // affine.load op in the innermost loop as a filter. | ||||
// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='memref-filter=1' | FileCheck %s --check-prefix=FILTER | // RUN: mlir-opt %s -split-input-file -test-affine-data-copy='memref-filter' | FileCheck %s --check-prefix=FILTER | ||||
// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='for-memref-region' | FileCheck %s --check-prefix=MEMREF_REGION | |||||
bondhugula: Do you need the '=1'? It could be confusing unless it's '=true'. | |||||
// -copy-skip-non-stride-loops forces the copies to be placed right inside the | // -copy-skip-non-stride-loops forces the copies to be placed right inside the | ||||
// tile space loops, avoiding the sensitivity of copy placement depth to memory | // tile space loops, avoiding the sensitivity of copy placement depth to memory | ||||
// footprint -- so that one could write a definite test case and not have to | // footprint -- so that one could write a definite test case and not have to | ||||
// update it each time something related to the cost functions change. | // update it each time something related to the cost functions change. | ||||
#map0 = affine_map<(d0) -> (d0)> | #map0 = affine_map<(d0) -> (d0)> | ||||
#map1 = affine_map<(d0) -> (d0 + 128)> | #map1 = affine_map<(d0) -> (d0 + 128)> | ||||
▲ Show 20 Lines • Show All 116 Lines • ▼ Show 20 Lines | |||||
// ----- | // ----- | ||||
// | // | ||||
// This test case will lead to single element buffers. These are eventually | // This test case will lead to single element buffers. These are eventually | ||||
// expected to be turned into registers via alloca and mem2reg. | // expected to be turned into registers via alloca and mem2reg. | ||||
// | // | ||||
// CHECK-SMALL-LABEL: func @foo | // CHECK-SMALL-LABEL: func @foo | ||||
// FILTER-LABEL: func @foo | // FILTER-LABEL: func @foo | ||||
// MEMREF_REGION-LABEL: func @foo | |||||
// MEMREF-REGION-LABEL here to be safe. bondhugula: // MEMREF-REGION-LABEL
here to be safe. | |||||
func @foo(%arg0: memref<1024x1024xf32>, %arg1: memref<1024x1024xf32>, %arg2: memref<1024x1024xf32>) -> memref<1024x1024xf32> { | func @foo(%arg0: memref<1024x1024xf32>, %arg1: memref<1024x1024xf32>, %arg2: memref<1024x1024xf32>) -> memref<1024x1024xf32> { | ||||
affine.for %i = 0 to 1024 { | affine.for %i = 0 to 1024 { | ||||
affine.for %j = 0 to 1024 { | affine.for %j = 0 to 1024 { | ||||
affine.for %k = 0 to 1024 { | affine.for %k = 0 to 1024 { | ||||
%6 = affine.load %arg1[%k, %j] : memref<1024x1024xf32> | %6 = affine.load %arg1[%k, %j] : memref<1024x1024xf32> | ||||
%7 = affine.load %arg2[%i, %j] : memref<1024x1024xf32> | %7 = affine.load %arg2[%i, %j] : memref<1024x1024xf32> | ||||
%9 = addf %6, %7 : f32 | %9 = addf %6, %7 : f32 | ||||
affine.store %9, %arg2[%i, %j] : memref<1024x1024xf32> | affine.store %9, %arg2[%i, %j] : memref<1024x1024xf32> | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | |||||
// FILTER-NOT: alloc() | // FILTER-NOT: alloc() | ||||
// FILTER: affine.for %{{.*}} = 0 to 1024 { | // FILTER: affine.for %{{.*}} = 0 to 1024 { | ||||
// FILTER: affine.for %{{.*}} = 0 to 1024 { | // FILTER: affine.for %{{.*}} = 0 to 1024 { | ||||
// FILTER: affine.for %{{.*}} = 0 to 1024 { | // FILTER: affine.for %{{.*}} = 0 to 1024 { | ||||
// FILTER-NEXT: affine.for %{{.*}} = 0 to 1024 { | // FILTER-NEXT: affine.for %{{.*}} = 0 to 1024 { | ||||
// FILTER-NEXT: affine.for %{{.*}} = 0 to 1024 { | // FILTER-NEXT: affine.for %{{.*}} = 0 to 1024 { | ||||
// FILTER: dealloc %{{.*}} : memref<1024x1024xf32> | // FILTER: dealloc %{{.*}} : memref<1024x1024xf32> | ||||
// FILTER-NOT: dealloc | // FILTER-NOT: dealloc | ||||
Perhaps a comment here on what is being checked for. bondhugula: Perhaps a comment here on what is being checked for. | |||||
// CHeck that only one memref is copied, because for-memref-region is enabled | |||||
// (and the first ever encountered load is analyzed). | |||||
// MEMREF_REGION: alloc() : memref<1024x1024xf32> | |||||
// MEMREF_REGION-NOT: alloc() | |||||
// MEMREF_REGION: affine.for %{{.*}} = 0 to 1024 { | |||||
// MEMREF_REGION: affine.for %{{.*}} = 0 to 1024 { | |||||
// MEMREF_REGION: affine.for %{{.*}} = 0 to 1024 { | |||||
// MEMREF_REGION-NEXT: affine.for %{{.*}} = 0 to 1024 { | |||||
// MEMREF_REGION-NEXT: affine.for %{{.*}} = 0 to 1024 { | |||||
// MEMREF_REGION: dealloc %{{.*}} : memref<1024x1024xf32> | |||||
// MEMREF_REGION-NOT: dealloc |
Do you need the '=1'? It could be confusing unless it's '=true'.