diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/Scalable/fill-1d.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/Scalable/fill-1d.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/Linalg/CPU/Scalable/fill-1d.mlir @@ -0,0 +1,56 @@ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -lower-vector-mask -one-shot-bufferize -test-lower-to-llvm | \ +// RUN: %mcr_aarch64_cmd -e=entry -entry-point-result=void --march=aarch64 --mattr="+sve" -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils | \ +// RUN: FileCheck %s + +func.func @printTestEnd() { + %0 = llvm.mlir.addressof @str_sve_end : !llvm.ptr> + %1 = llvm.mlir.constant(0 : index) : i64 + %2 = llvm.getelementptr %0[%1, %1] + : (!llvm.ptr>, i64, i64) -> !llvm.ptr + llvm.call @printCString(%2) : (!llvm.ptr) -> () + return +} + +func.func @entry() { + %c4 = arith.constant 4 : index + %c0 = arith.constant 0 : index + %step = arith.constant 1 : index + %c1_f32 = arith.constant 123.0 : f32 + + %vscale = vector.vscale + %vl_fp = arith.muli %c4, %vscale : index + %vec = bufferization.alloc_tensor(%vl_fp) : tensor + + %vec_out = scf.for %i = %c0 to %vl_fp step %step iter_args(%vin = %vec) -> tensor { + %vout = tensor.insert %c1_f32 into %vin[%i] : tensor + scf.yield %vout : tensor + } + + %pi = arith.constant 3.14 : f32 + %vec_out_1 = linalg.fill ins(%pi : f32) outs(%vec_out : tensor) -> tensor + + // There are at least 4 f32 elements in every SVE vector. For implementations + // with wider vectors, you should see more elements being printed. + // CHECK: 3.14 + // CHECK: 3.14 + // CHECK: 3.14 + // CHECK: 3.14 + scf.for %i = %c0 to %vl_fp step %step { + %element = tensor.extract %vec_out_1[%i] : tensor + vector.print %element : f32 + } + + // CHECK: SVE: END OF TEST OUTPUT + func.call @printTestEnd() : () -> () + + return +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op + transform.structured.masked_vectorize %0 vector_sizes [[4]] : !transform.any_op +} + +llvm.func @printCString(!llvm.ptr) +llvm.mlir.global internal constant @str_sve_end("SVE: END OF TEST OUTPUT\0A") diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/Scalable/lit.local.cfg b/mlir/test/Integration/Dialect/Linalg/CPU/Scalable/lit.local.cfg new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/Linalg/CPU/Scalable/lit.local.cfg @@ -0,0 +1,9 @@ +import sys + +# ArmSVE tests must be enabled via build flag. +if not config.mlir_run_arm_sve_tests: + config.unsupported = True + +# No JIT on win32. +if sys.platform == "win32": + config.unsupported = True