Index: mlir/test/CMakeLists.txt =================================================================== --- mlir/test/CMakeLists.txt +++ mlir/test/CMakeLists.txt @@ -29,6 +29,7 @@ option(MLIR_RUN_CUDA_TENSOR_CORE_TESTS "Run CUDA Tensor core WMMA tests.") option(MLIR_RUN_CUDA_SM80_TESTS "Run CUDA A100 tests.") option(MLIR_RUN_ARM_SVE_TESTS "Run Arm SVE tests.") + option(MLIR_RUN_ARM_SME_TESTS "Run Arm SME tests.") # The native target may not be enabled when cross compiling, raise an error. @@ -52,6 +53,7 @@ MLIR_RUN_CUDA_TENSOR_CORE_TESTS MLIR_RUN_X86VECTOR_TESTS MLIR_RUN_ARM_SVE_TESTS + MLIR_RUN_ARM_SME_TESTS MLIR_RUN_CUDA_SM80_TESTS ) Index: mlir/test/Integration/Dialect/Vector/CPU/ArmSME/lit.local.cfg =================================================================== --- /dev/null +++ mlir/test/Integration/Dialect/Vector/CPU/ArmSME/lit.local.cfg @@ -0,0 +1,26 @@ +import sys + +# ArmSME tests must be enabled via build flag. +if not config.mlir_run_arm_sme_tests: + config.unsupported = True + +# No JIT on win32. +if sys.platform == 'win32': + config.unsupported = True + +lli_cmd = 'lli' +if config.arm_emulator_lli_executable: + lli_cmd = config.arm_emulator_lli_executable + +config.substitutions.append(('%mlir_native_utils_lib_dir', + config.arm_emulator_utils_lib_dir or config.mlir_lib_dir)) + +if config.arm_emulator_executable: + # Run test in emulator (QEMU) + emulation_cmd = config.arm_emulator_executable + if config.arm_emulator_options: + emulation_cmd = emulation_cmd + ' ' + config.arm_emulator_options + emulation_cmd = emulation_cmd + ' ' + lli_cmd + config.substitutions.append(('%lli', emulation_cmd)) +else: + config.substitutions.append(('%lli', lli_cmd)) Index: mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-ssve.mlir =================================================================== --- /dev/null +++ mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-ssve.mlir @@ -0,0 +1,65 @@ +// RUN: mlir-opt %s -test-lower-to-llvm | \ +// RUN: mlir-translate -mlir-to-llvmir | \ +// RUN: %lli --march=aarch64 --mattr="+sve,+sme" \ +// RUN: -force-streaming-compatible-sve \ +// RUN: --entry-function=entry \ +// RUN: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +// NOTE: To run this test, your CPU must support SME. + +// VLA memcopy in streaming mode. +func.func @streaming_kernel_copy(%src : memref, %dst : memref, %size : index) attributes {passthrough = ["aarch64_pstate_sm_enabled"]} { + %c0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + %vscale = vector.vscale + %step = arith.muli %c2, %vscale : index + scf.for %i = %c0 to %size step %step { + %0 = vector.load %src[%i] : memref, vector<[2]xi64> + vector.store %0, %dst[%i] : memref, vector<[2]xi64> + } + return +} + +func.func @entry() -> i32 { + %i0 = arith.constant 0: i64 + %r0 = arith.constant 0: i32 + %c0 = arith.constant 0: index + %c4 = arith.constant 4: index + %c32 = arith.constant 32: index + + // Set up memory. + %a = memref.alloc() : memref<32xi64> + %a_copy = memref.alloc() : memref<32xi64> + %a_data = arith.constant dense<[1 , 2, 3 , 4 , 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32]> : vector<32xi64> + vector.transfer_write %a_data, %a[%c0] : vector<32xi64>, memref<32xi64> + + // Call kernel. + %0 = memref.cast %a : memref<32xi64> to memref + %1 = memref.cast %a_copy : memref<32xi64> to memref + call @streaming_kernel_copy(%0, %1, %c32) : (memref, memref, index) -> () + + // Print and verify. + // + // CHECK: ( 1, 2, 3, 4 ) + // CHECK-NEXT: ( 5, 6, 7, 8 ) + // CHECK-NEXT: ( 9, 10, 11, 12 ) + // CHECK-NEXT: ( 13, 14, 15, 16 ) + // CHECK-NEXT: ( 17, 18, 19, 20 ) + // CHECK-NEXT: ( 21, 22, 23, 24 ) + // CHECK-NEXT: ( 25, 26, 27, 28 ) + // CHECK-NEXT: ( 29, 30, 31, 32 ) + scf.for %i = %c0 to %c32 step %c4 { + %cv = vector.transfer_read %a_copy[%i], %i0 : memref<32xi64>, vector<4xi64> + vector.print %cv : vector<4xi64> + } + + // Release resources. + memref.dealloc %a : memref<32xi64> + memref.dealloc %a_copy : memref<32xi64> + + return %r0 : i32 +} Index: mlir/test/Target/LLVMIR/arm-ssve.mlir =================================================================== --- /dev/null +++ mlir/test/Target/LLVMIR/arm-ssve.mlir @@ -0,0 +1,11 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +// Attribute to enable streaming-mode. + +// CHECK-LABEL: @streaming_callee +// CHECK: #[[ATTR:[0-9]*]] +llvm.func @streaming_callee() attributes {passthrough = ["aarch64_pstate_sm_enabled"]} { + llvm.return +} + +// CHECK: attributes #[[ATTR]] = { "aarch64_pstate_sm_enabled" } Index: mlir/test/lit.site.cfg.py.in =================================================================== --- mlir/test/lit.site.cfg.py.in +++ mlir/test/lit.site.cfg.py.in @@ -36,6 +36,7 @@ config.intel_sde_executable = "@INTEL_SDE_EXECUTABLE@" config.mlir_run_amx_tests = @MLIR_RUN_AMX_TESTS@ config.mlir_run_arm_sve_tests = @MLIR_RUN_ARM_SVE_TESTS@ +config.mlir_run_arm_sme_tests = @MLIR_RUN_ARM_SME_TESTS@ config.mlir_run_x86vector_tests = @MLIR_RUN_X86VECTOR_TESTS@ config.mlir_run_riscv_vector_tests = "@MLIR_RUN_RISCV_VECTOR_TESTS@" config.mlir_run_cuda_tensor_core_tests = @MLIR_RUN_CUDA_TENSOR_CORE_TESTS@