diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -2148,6 +2148,9 @@ fullyComposeAffineMapAndOperands(&bufAffineMap, &bufIndices); if (!copyOptions.generateDma) { + // Unable to copy generation for scala memref due to lack of access point. + if (rank == 0) + return failure(); // Point-wise copy generation. auto copyNest = generatePointWiseCopy(loc, memref, fastMemRef, lbMaps, diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir --- a/mlir/test/Dialect/Affine/affine-data-copy.mlir +++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir @@ -333,3 +333,51 @@ // CHECK-NEXT: affine.for %{{.*}} = 0 to 8 return } + +// CHECK-LABEL: func @scalar_memref_copy_without_dma +func.func @scalar_memref_copy_without_dma() { + %false = arith.constant false + %4 = memref.alloc() {alignment = 128 : i64} : memref + affine.store %false, %4[] : memref + + // CHECK: %[[FALSE:.*]] = arith.constant false + // CHECK: %[[MEMREF:.*]] = memref.alloc() {alignment = 128 : i64} : memref + // CHECK: affine.store %[[FALSE]], %[[MEMREF]][] : memref + return +} + +// CHECK-LABEL: func @scalar_memref_copy_in_loop +func.func @scalar_memref_copy_in_loop(%3:memref<480xi1>) { + %false = arith.constant false + %4 = memref.alloc() {alignment = 128 : i64} : memref + affine.store %false, %4[] : memref + %5 = memref.alloc() {alignment = 128 : i64} : memref + memref.copy %4, %5 : memref to memref + affine.for %arg0 = 0 to 480 { + %11 = affine.load %3[%arg0] : memref<480xi1> + %12 = affine.load %5[] : memref + %13 = arith.cmpi slt, %11, %12 : i1 + %14 = arith.select %13, %11, %12 : i1 + affine.store %14, %5[] : memref + } + + // CHECK: %[[FALSE:.*]] = arith.constant false + // CHECK: %[[MEMREF:.*]] = memref.alloc() {alignment = 128 : i64} : memref + // CHECK: affine.store %[[FALSE]], %[[MEMREF]][] : memref + // CHECK: %[[TARGET:.*]] = memref.alloc() {alignment = 128 : i64} : memref + // CHECK: memref.copy %alloc, %[[TARGET]] : memref to memref + // CHECK: %[[FAST_MEMREF:.*]] = memref.alloc() : memref<480xi1> + // CHECK: affine.for %{{.*}} = 0 to 480 { + // CHECK: %{{.*}} = affine.load %arg0[%{{.*}}] : memref<480xi1> + // CHECK: affine.store %{{.*}}, %[[FAST_MEMREF]][%{{.*}}] : memref<480xi1> + // CHECK: } + // CHECK: affine.for %arg1 = 0 to 480 { + // CHECK: %[[L0:.*]] = affine.load %[[FAST_MEMREF]][%arg1] : memref<480xi1> + // CHECK: %[[L1:.*]] = affine.load %[[TARGET]][] : memref + // CHECK: %[[CMPI:.*]] = arith.cmpi slt, %[[L0]], %[[L1]] : i1 + // CHECK: %[[SELECT:.*]] = arith.select %[[CMPI]], %[[L0]], %[[L1]] : i1 + // CHECK: affine.store %[[SELECT]], %[[TARGET]][] : memref + // CHECK: } + // CHECK: memref.dealloc %[[FAST_MEMREF]] : memref<480xi1> + return +} \ No newline at end of file