diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir --- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir @@ -38,9 +38,9 @@ %c2 = constant 2: index // Set up memory. - %a = alloc() : memref<2x4xbf16> - %b = alloc() : memref<2x4xbf16> - %c = alloc() : memref<2x2xf32> + %a = memref.alloc() : memref<2x4xbf16> + %b = memref.alloc() : memref<2x4xbf16> + %c = memref.alloc() : memref<2x2xf32> %0 = std.constant dense<[[1.0, 2.0, 3.0, 4.0 ], [5.0, 6.0, 7.0, 8.0 ]]> : vector<2x4xbf16> @@ -75,9 +75,9 @@ } // Release resources. - dealloc %a : memref<2x4xbf16> - dealloc %b : memref<2x4xbf16> - dealloc %c : memref<2x2xf32> + memref.dealloc %a : memref<2x4xbf16> + memref.dealloc %b : memref<2x4xbf16> + memref.dealloc %c : memref<2x2xf32> return } diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir --- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir @@ -38,9 +38,9 @@ %c2 = constant 2: index // Set up memory. - %a = alloc() : memref<2x8xi8> - %b = alloc() : memref<2x8xi8> - %c = alloc() : memref<2x2xi32> + %a = memref.alloc() : memref<2x8xi8> + %b = memref.alloc() : memref<2x8xi8> + %c = memref.alloc() : memref<2x2xi32> %0 = std.constant dense<[[1 , 2, 3 , 4 , 5, 6, 7, 8], [9, 10, 11, 12, 13, 14, 15, 16]]> : vector<2x8xi8> @@ -75,9 +75,9 @@ } // Release resources. - dealloc %a : memref<2x8xi8> - dealloc %b : memref<2x8xi8> - dealloc %c : memref<2x2xi32> + memref.dealloc %a : memref<2x8xi8> + memref.dealloc %b : memref<2x8xi8> + memref.dealloc %c : memref<2x2xi32> return } diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero-block.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero-block.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero-block.mlir @@ -0,0 +1,81 @@ +// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm="enable-amx" -convert-std-to-llvm | \ +// RUN: mlir-translate -mlir-to-llvmir | \ +// RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +// Note: To run this test, your CPU must support AMX. + +func @print(%arg0: memref<4x32xf32>) { + %fu = constant -1.0: f32 + %c0 = constant 0: index + %c1 = constant 1: index + %c4 = constant 4: index + scf.for %i = %c0 to %c4 step %c1 { + %0 = vector.transfer_read %arg0[%i, %c0], %fu: memref<4x32xf32>, vector<32xf32> + vector.print %0 : vector<32xf32> + } + return +} + +func @kernel(%arg0: memref<4x32xf32>) { + %c0 = constant 0: index + %c2 = constant 2 : index + %c4 = constant 4 : index + %c16 = constant 16 : index + %c32 = constant 32 : index + scf.for %i = %c0 to %c4 step %c2 { + scf.for %j = %c0 to %c32 step %c16 { + %0 = amx.tile_zero : vector<2x16xf32> + amx.tile_store %arg0[%i, %j], %0 : memref<4x32xf32>, vector<2x16xf32> + call @print(%arg0) : (memref<4x32xf32>) -> () + } + } + return +} + +func @entry() { + %f1 = constant 1.0: f32 + %c0 = constant 0: index + %c1 = constant 1: index + %c4 = constant 4 : index + %c32 = constant 32 : index + + // Set up memory. + %a = memref.alloc() : memref<4x32xf32> + scf.for %i = %c0 to %c4 step %c1 { + scf.for %j = %c0 to %c32 step %c1 { + memref.store %f1, %a[%i, %j] : memref<4x32xf32> + } + } + + // Call kernel. + call @kernel(%a) : (memref<4x32xf32>) -> () + + // Verify progress of blocked tilezero. + // + // CHECK: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ) + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ) + // CHECK-NEXT: ( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ) + // CHECK-NEXT: ( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ) + // + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ) + // CHECK-NEXT: ( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ) + // + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ) + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ) + // + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + // + + // Release resources. + memref.dealloc %a : memref<4x32xf32> + + return +} diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero.mlir --- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-tilezero.mlir @@ -20,10 +20,10 @@ %c19 = constant 19: index // Set up memory. - %a = alloc(%c19, %c19) : memref + %a = memref.alloc(%c19, %c19) : memref scf.for %i = %c0 to %c19 step %c1 { scf.for %j = %c0 to %c19 step %c1 { - store %i1, %a[%i, %j] : memref + memref.store %i1, %a[%i, %j] : memref } } @@ -90,7 +90,7 @@ } // Release resources. - dealloc %a : memref + memref.dealloc %a : memref return }