Index: mlir/test/CMakeLists.txt
===================================================================
--- mlir/test/CMakeLists.txt
+++ mlir/test/CMakeLists.txt
@@ -29,6 +29,7 @@
   option(MLIR_RUN_CUDA_TENSOR_CORE_TESTS "Run CUDA Tensor core WMMA tests.")
   option(MLIR_RUN_CUDA_SM80_TESTS "Run CUDA A100 tests.")
   option(MLIR_RUN_ARM_SVE_TESTS "Run Arm SVE tests.")
+  option(MLIR_RUN_ARM_SME_TESTS "Run Arm SME tests.")
 
 
   # The native target may not be enabled when cross compiling, raise an error.
@@ -52,6 +53,7 @@
   MLIR_RUN_CUDA_TENSOR_CORE_TESTS
   MLIR_RUN_X86VECTOR_TESTS
   MLIR_RUN_ARM_SVE_TESTS
+  MLIR_RUN_ARM_SME_TESTS
   MLIR_RUN_CUDA_SM80_TESTS
   )
 
Index: mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
===================================================================
--- mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
+++ mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
@@ -1,4 +1,5 @@
 import sys
+from lit.llvm import llvm_config
 
 # FIXME: %mlir_native_utils_lib_dir is set incorrectly on Windows
 if sys.platform == 'win32':
@@ -18,6 +19,15 @@
         config.substitutions.append(('%mlir_native_utils_lib_dir', config.mlir_lib_dir))
 
     if config.arm_emulator_executable:
+        if not config.arm_emulator_lli_executable:
+            # Top-level lit config adds llvm_tools_dir to PATH but this is lost
+            # when running under an emulator. If the user didn't specify an lli
+            # executable, use absolute path %llvm_tools_dir/lli.
+            # TODO(c-rhodes): This logic is duplicated across several Lit files
+            # and needs refactoring.
+            lli_cmd = llvm_config.use_llvm_tool('lli', search_env='LLI', required=True,
+                                                search_paths=[config.llvm_tools_dir],
+                                                use_installed=False)
         # Run test in emulator (qemu or armie).
         emulation_cmd = config.arm_emulator_executable
         if config.arm_emulator_options:
Index: mlir/test/Integration/Dialect/Vector/CPU/ArmSME/lit.local.cfg
===================================================================
--- /dev/null
+++ mlir/test/Integration/Dialect/Vector/CPU/ArmSME/lit.local.cfg
@@ -0,0 +1,36 @@
+import sys
+from lit.llvm import llvm_config
+
+# ArmSME tests must be enabled via build flag.
+if not config.mlir_run_arm_sme_tests:
+    config.unsupported = True
+
+# No JIT on win32.
+if sys.platform == 'win32':
+    config.unsupported = True
+
+lli_cmd = 'lli'
+if config.arm_emulator_lli_executable:
+    lli_cmd = config.arm_emulator_lli_executable
+
+config.substitutions.append(('%mlir_native_utils_lib_dir',
+    config.arm_emulator_utils_lib_dir or config.mlir_lib_dir))
+
+if config.arm_emulator_executable:
+    if not config.arm_emulator_lli_executable:
+        # Top-level lit config adds llvm_tools_dir to PATH but this is lost
+        # when running under an emulator. If the user didn't specify an lli
+        # executable, use absolute path %llvm_tools_dir/lli.
+        # TODO(c-rhodes): This logic is duplicated across several Lit files and
+        # needs refactoring.
+        lli_cmd = llvm_config.use_llvm_tool('lli', search_env='LLI', required=True,
+                                            search_paths=[config.llvm_tools_dir],
+                                            use_installed=False)
+    # Run test in emulator (QEMU)
+    emulation_cmd = config.arm_emulator_executable
+    if config.arm_emulator_options:
+        emulation_cmd = emulation_cmd + ' ' + config.arm_emulator_options
+    emulation_cmd = emulation_cmd + ' ' + lli_cmd
+    config.substitutions.append(('%lli', emulation_cmd))
+else:
+    config.substitutions.append(('%lli', lli_cmd))
Index: mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-ssve.mlir
===================================================================
--- /dev/null
+++ mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-ssve.mlir
@@ -0,0 +1,65 @@
+// RUN: mlir-opt %s -test-lower-to-llvm | \
+// RUN: mlir-translate -mlir-to-llvmir | \
+// RUN: %lli --march=aarch64 --mattr="+sve,+sme" \
+// RUN:      -force-streaming-compatible-sve \
+// RUN:      --entry-function=entry \
+// RUN:      --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+// NOTE: To run this test, your CPU must support SME.
+
+// VLA memcopy in streaming mode.
+func.func @streaming_kernel_copy(%src : memref<?xi64>, %dst : memref<?xi64>, %size : index) attributes {passthrough = ["aarch64_pstate_sm_enabled"]} {
+  %c0 = arith.constant 0 : index
+  %c2 = arith.constant 2 : index
+  %vscale = vector.vscale
+  %step = arith.muli %c2, %vscale : index
+  scf.for %i = %c0 to %size step %step {
+    %0 = vector.load %src[%i] : memref<?xi64>, vector<[2]xi64>
+    vector.store %0, %dst[%i] : memref<?xi64>, vector<[2]xi64>
+  }
+  return
+}
+
+func.func @entry() -> i32 {
+  %i0 = arith.constant 0: i64
+  %r0 = arith.constant 0: i32
+  %c0 = arith.constant 0: index
+  %c4 = arith.constant 4: index
+  %c32 = arith.constant 32: index
+
+  // Set up memory.
+  %a = memref.alloc()      : memref<32xi64>
+  %a_copy = memref.alloc() : memref<32xi64>
+  %a_data = arith.constant dense<[1 , 2,  3 , 4 , 5,  6,  7,  8,
+                                  9, 10, 11, 12, 13, 14, 15, 16,
+                                  17, 18, 19, 20, 21, 22, 23, 24,
+                                  25, 26, 27, 28, 29, 30, 31, 32]> : vector<32xi64>
+  vector.transfer_write %a_data, %a[%c0] : vector<32xi64>, memref<32xi64>
+
+  // Call kernel.
+  %0 = memref.cast %a : memref<32xi64> to memref<?xi64>
+  %1 = memref.cast %a_copy : memref<32xi64> to memref<?xi64>
+  call @streaming_kernel_copy(%0, %1, %c32) : (memref<?xi64>, memref<?xi64>, index) -> ()
+
+  // Print and verify.
+  //
+  // CHECK:      ( 1, 2, 3, 4 )
+  // CHECK-NEXT: ( 5, 6, 7, 8 )
+  // CHECK-NEXT: ( 9, 10, 11, 12 )
+  // CHECK-NEXT: ( 13, 14, 15, 16 )
+  // CHECK-NEXT: ( 17, 18, 19, 20 )
+  // CHECK-NEXT: ( 21, 22, 23, 24 )
+  // CHECK-NEXT: ( 25, 26, 27, 28 )
+  // CHECK-NEXT: ( 29, 30, 31, 32 )
+  scf.for %i = %c0 to %c32 step %c4 {
+    %cv = vector.transfer_read %a_copy[%i], %i0 : memref<32xi64>, vector<4xi64>
+    vector.print %cv : vector<4xi64>
+  }
+
+  // Release resources.
+  memref.dealloc %a      : memref<32xi64>
+  memref.dealloc %a_copy : memref<32xi64>
+
+  return %r0 : i32
+}
Index: mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/lit.local.cfg
===================================================================
--- mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/lit.local.cfg
+++ mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/lit.local.cfg
@@ -1,4 +1,5 @@
 import sys
+from lit.llvm import llvm_config
 
 # ArmSVE tests must be enabled via build flag.
 if not config.mlir_run_arm_sve_tests:
@@ -16,6 +17,15 @@
     config.arm_emulator_utils_lib_dir or config.mlir_lib_dir))
 
 if config.arm_emulator_executable:
+    if not config.arm_emulator_lli_executable:
+        # Top-level lit config adds llvm_tools_dir to PATH but this is lost
+        # when running under an emulator. If the user didn't specify an lli
+        # executable, use absolute path %llvm_tools_dir/lli.
+        # TODO(c-rhodes): This logic is duplicated across several Lit files and
+        # needs refactoring.
+        lli_cmd = llvm_config.use_llvm_tool('lli', search_env='LLI', required=True,
+                                            search_paths=[config.llvm_tools_dir],
+                                            use_installed=False)
     # Run test in emulator (qemu or armie)
     emulation_cmd = config.arm_emulator_executable
     if config.arm_emulator_options:
Index: mlir/test/Target/LLVMIR/arm-ssve.mlir
===================================================================
--- /dev/null
+++ mlir/test/Target/LLVMIR/arm-ssve.mlir
@@ -0,0 +1,11 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// Attribute to enable streaming-mode.
+
+// CHECK-LABEL: @streaming_callee
+// CHECK: #[[ATTR:[0-9]*]]
+llvm.func @streaming_callee() attributes {passthrough = ["aarch64_pstate_sm_enabled"]} {
+  llvm.return
+}
+
+// CHECK: attributes #[[ATTR]] = { "aarch64_pstate_sm_enabled" }
Index: mlir/test/lit.site.cfg.py.in
===================================================================
--- mlir/test/lit.site.cfg.py.in
+++ mlir/test/lit.site.cfg.py.in
@@ -36,6 +36,7 @@
 config.intel_sde_executable = "@INTEL_SDE_EXECUTABLE@"
 config.mlir_run_amx_tests = @MLIR_RUN_AMX_TESTS@
 config.mlir_run_arm_sve_tests = @MLIR_RUN_ARM_SVE_TESTS@
+config.mlir_run_arm_sme_tests = @MLIR_RUN_ARM_SME_TESTS@
 config.mlir_run_x86vector_tests = @MLIR_RUN_X86VECTOR_TESTS@
 config.mlir_run_riscv_vector_tests = "@MLIR_RUN_RISCV_VECTOR_TESTS@"
 config.mlir_run_cuda_tensor_core_tests = @MLIR_RUN_CUDA_TENSOR_CORE_TESTS@