diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -42,6 +42,7 @@
 
 set(MLIR_CUDA_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir CUDA runner")
 set(MLIR_ROCM_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir ROCm runner")
+set(MLIR_SPIRV_CPU_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir SPIR-V cpu runner")
 set(MLIR_VULKAN_RUNNER_ENABLED 0 CACHE BOOL "Enable building the mlir Vulkan runner")
 
 option(MLIR_INCLUDE_TESTS
diff --git a/mlir/examples/toy/Ch6/toyc.cpp b/mlir/examples/toy/Ch6/toyc.cpp
--- a/mlir/examples/toy/Ch6/toyc.cpp
+++ b/mlir/examples/toy/Ch6/toyc.cpp
@@ -226,7 +226,8 @@
 
   // Create an MLIR execution engine. The execution engine eagerly JIT-compiles
   // the module.
-  auto maybeEngine = mlir::ExecutionEngine::create(module, optPipeline);
+  auto maybeEngine = mlir::ExecutionEngine::create(
+      module, /*llvmModuleBuilder=*/nullptr, optPipeline);
   assert(maybeEngine && "failed to construct an execution engine");
   auto &engine = maybeEngine.get();
 
diff --git a/mlir/examples/toy/Ch7/toyc.cpp b/mlir/examples/toy/Ch7/toyc.cpp
--- a/mlir/examples/toy/Ch7/toyc.cpp
+++ b/mlir/examples/toy/Ch7/toyc.cpp
@@ -227,7 +227,8 @@
 
   // Create an MLIR execution engine. The execution engine eagerly JIT-compiles
   // the module.
-  auto maybeEngine = mlir::ExecutionEngine::create(module, optPipeline);
+  auto maybeEngine = mlir::ExecutionEngine::create(
+      module, /*llvmModuleBuilder=*/nullptr, optPipeline);
   assert(maybeEngine && "failed to construct an execution engine");
   auto &engine = maybeEngine.get();
 
diff --git a/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h b/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h
--- a/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h
+++ b/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h
@@ -65,6 +65,10 @@
 
   /// Creates an execution engine for the given module.
   ///
+  /// If `llvmModuleBuilder` is provided, it will be used to create LLVM module
+  /// from the given MLIR module. Otherwise, a default `translateModuleToLLVMIR`
+  /// function will be used to translate MLIR module to LLVM IR.
+  ///
   /// If `transformer` is provided, it will be called on the LLVM module during
   /// JIT-compilation and can be used, e.g., for reporting or optimization.
   ///
@@ -84,6 +88,9 @@
   /// the llvm's global Perf notification listener.
   static llvm::Expected<std::unique_ptr<ExecutionEngine>>
   create(ModuleOp m,
+         llvm::function_ref<std::unique_ptr<llvm::Module>(ModuleOp,
+                                                          llvm::LLVMContext &)>
+             llvmModuleBuilder = nullptr,
          llvm::function_ref<llvm::Error(llvm::Module *)> transformer = {},
          Optional<llvm::CodeGenOpt::Level> jitCodeGenOptLevel = llvm::None,
          ArrayRef<StringRef> sharedLibPaths = {}, bool enableObjectCache = true,
diff --git a/mlir/include/mlir/ExecutionEngine/JitRunner.h b/mlir/include/mlir/ExecutionEngine/JitRunner.h
--- a/mlir/include/mlir/ExecutionEngine/JitRunner.h
+++ b/mlir/include/mlir/ExecutionEngine/JitRunner.h
@@ -18,20 +18,29 @@
 #ifndef MLIR_SUPPORT_JITRUNNER_H_
 #define MLIR_SUPPORT_JITRUNNER_H_
 
+#include "mlir/IR/Module.h"
+
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Module.h"
 
 namespace mlir {
 
+using TranslationCallback = llvm::function_ref<std::unique_ptr<llvm::Module>(
+    ModuleOp, llvm::LLVMContext &)>;
+
 class ModuleOp;
 struct LogicalResult;
 
 // Entry point for all CPU runners. Expects the common argc/argv arguments for
-// standard C++ main functions and an mlirTransformer.
-// The latter is applied after parsing the input into MLIR IR and before passing
-// the MLIR module to the ExecutionEngine.
+// standard C++ main functions, `mlirTransformer` and `llvmModuleBuilder`.
+/// `mlirTransformer` is applied after parsing the input into MLIR IR and before
+/// passing the MLIR module to the ExecutionEngine.
+/// `llvmModuleBuilder` is a custom function that is passed to ExecutionEngine.
+/// It processes MLIR module and creates LLVM IR module.
 int JitRunnerMain(
     int argc, char **argv,
-    llvm::function_ref<LogicalResult(mlir::ModuleOp)> mlirTransformer);
+    llvm::function_ref<LogicalResult(mlir::ModuleOp)> mlirTransformer,
+    TranslationCallback llvmModuleBuilder = nullptr);
 
 } // namespace mlir
 
diff --git a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
--- a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -214,7 +214,11 @@
                        : nullptr) {}
 
 Expected<std::unique_ptr<ExecutionEngine>> ExecutionEngine::create(
-    ModuleOp m, llvm::function_ref<Error(llvm::Module *)> transformer,
+    ModuleOp m,
+    llvm::function_ref<std::unique_ptr<llvm::Module>(ModuleOp,
+                                                     llvm::LLVMContext &)>
+        llvmModuleBuilder,
+    llvm::function_ref<Error(llvm::Module *)> transformer,
     Optional<llvm::CodeGenOpt::Level> jitCodeGenOptLevel,
     ArrayRef<StringRef> sharedLibPaths, bool enableObjectCache,
     bool enableGDBNotificationListener, bool enablePerfNotificationListener) {
@@ -223,7 +227,8 @@
       enablePerfNotificationListener);
 
   std::unique_ptr<llvm::LLVMContext> ctx(new llvm::LLVMContext);
-  auto llvmModule = translateModuleToLLVMIR(m, *ctx);
+  auto llvmModule = llvmModuleBuilder ? llvmModuleBuilder(m, *ctx)
+                                      : translateModuleToLLVMIR(m, *ctx);
   if (!llvmModule)
     return make_string_error("could not convert to LLVM IR");
   // FIXME: the triple should be passed to the translation or dialect conversion
diff --git a/mlir/lib/ExecutionEngine/JitRunner.cpp b/mlir/lib/ExecutionEngine/JitRunner.cpp
--- a/mlir/lib/ExecutionEngine/JitRunner.cpp
+++ b/mlir/lib/ExecutionEngine/JitRunner.cpp
@@ -20,7 +20,6 @@
 #include "mlir/ExecutionEngine/ExecutionEngine.h"
 #include "mlir/ExecutionEngine/OptUtils.h"
 #include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Module.h"
 #include "mlir/IR/StandardTypes.h"
 #include "mlir/InitAllDialects.h"
 #include "mlir/Parser.h"
@@ -31,7 +30,6 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassNameParser.h"
-#include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/SourceMgr.h"
@@ -134,7 +132,8 @@
 
 // JIT-compile the given module and run "entryPoint" with "args" as arguments.
 static Error
-compileAndExecute(Options &options, ModuleOp module, StringRef entryPoint,
+compileAndExecute(Options &options, ModuleOp module,
+                  TranslationCallback llvmModuleBuilder, StringRef entryPoint,
                   std::function<llvm::Error(llvm::Module *)> transformer,
                   void **args) {
   Optional<llvm::CodeGenOpt::Level> jitCodeGenOptLevel;
@@ -143,8 +142,8 @@
         static_cast<llvm::CodeGenOpt::Level>(clOptLevel.getValue());
   SmallVector<StringRef, 4> libs(options.clSharedLibs.begin(),
                                  options.clSharedLibs.end());
-  auto expectedEngine = mlir::ExecutionEngine::create(module, transformer,
-                                                      jitCodeGenOptLevel, libs);
+  auto expectedEngine = mlir::ExecutionEngine::create(
+      module, llvmModuleBuilder, transformer, jitCodeGenOptLevel, libs);
   if (!expectedEngine)
     return expectedEngine.takeError();
 
@@ -165,13 +164,15 @@
 }
 
 static Error compileAndExecuteVoidFunction(
-    Options &options, ModuleOp module, StringRef entryPoint,
+    Options &options, ModuleOp module, TranslationCallback llvmModuleBuilder,
+    StringRef entryPoint,
     std::function<llvm::Error(llvm::Module *)> transformer) {
   auto mainFunction = module.lookupSymbol<LLVM::LLVMFuncOp>(entryPoint);
   if (!mainFunction || mainFunction.empty())
     return make_string_error("entry point not found");
   void *empty = nullptr;
-  return compileAndExecute(options, module, entryPoint, transformer, &empty);
+  return compileAndExecute(options, module, llvmModuleBuilder, entryPoint,
+                           transformer, &empty);
 }
 
 template <typename Type>
@@ -196,7 +197,8 @@
 }
 template <typename Type>
 Error compileAndExecuteSingleReturnFunction(
-    Options &options, ModuleOp module, StringRef entryPoint,
+    Options &options, ModuleOp module, TranslationCallback llvmModuleBuilder,
+    StringRef entryPoint,
     std::function<llvm::Error(llvm::Module *)> transformer) {
   auto mainFunction = module.lookupSymbol<LLVM::LLVMFuncOp>(entryPoint);
   if (!mainFunction || mainFunction.isExternal())
@@ -213,8 +215,8 @@
     void *data;
   } data;
   data.data = &res;
-  if (auto error = compileAndExecute(options, module, entryPoint, transformer,
-                                     (void **)&data))
+  if (auto error = compileAndExecute(options, module, llvmModuleBuilder,
+                                     entryPoint, transformer, (void **)&data))
     return error;
 
   // Intentional printing of the output so we can test.
@@ -223,13 +225,16 @@
   return Error::success();
 }
 
-/// Entry point for all CPU runners. Expects the common argc/argv
-/// arguments for standard C++ main functions and an mlirTransformer.
-/// The latter is applied after parsing the input into MLIR IR and
-/// before passing the MLIR module to the ExecutionEngine.
+/// Entry point for all CPU runners. Expects the common argc/argv arguments for
+/// standard C++ main functions, `mlirTransformer` and `llvmModuleBuilder`.
+/// `mlirTransformer` is applied after parsing the input into MLIR IR and before
+/// passing the MLIR module to the ExecutionEngine.
+/// `llvmModuleBuilder` is a custom function that is passed to ExecutionEngine.
+/// It processes MLIR module and creates LLVM IR module.
 int mlir::JitRunnerMain(
     int argc, char **argv,
-    function_ref<LogicalResult(mlir::ModuleOp)> mlirTransformer) {
+    function_ref<LogicalResult(mlir::ModuleOp)> mlirTransformer,
+    TranslationCallback llvmModuleBuilder) {
   // Create the options struct containing the command line options for the
   // runner. This must come before the command line options are parsed.
   Options options;
@@ -289,7 +294,7 @@
 
   // Get the function used to compile and execute the module.
   using CompileAndExecuteFnT =
-      Error (*)(Options &, ModuleOp, StringRef,
+      Error (*)(Options &, ModuleOp, TranslationCallback, StringRef,
                 std::function<llvm::Error(llvm::Module *)>);
   auto compileAndExecuteFn =
       StringSwitch<CompileAndExecuteFnT>(options.mainFuncType.getValue())
@@ -301,7 +306,7 @@
 
   Error error =
       compileAndExecuteFn
-          ? compileAndExecuteFn(options, m.get(),
+          ? compileAndExecuteFn(options, m.get(), llvmModuleBuilder,
                                 options.mainFuncName.getValue(), transformer)
           : make_string_error("unsupported function type");
 
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -20,9 +20,10 @@
 set(MLIR_RUNNER_UTILS_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
 
 # Passed to lit.site.cfg.py.in to set up the path where to find the libraries
-# for the mlir cuda / rocm / vulkan runner tests.
+# for the mlir cuda / rocm / spirv / vulkan runner tests.
 set(MLIR_CUDA_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
 set(MLIR_ROCM_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
+set(MLIR_SPIRV_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
 set(MLIR_VULKAN_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
 
 configure_lit_site_cfg(
@@ -81,6 +82,14 @@
   )
 endif()
 
+if(MLIR_SPIRV_CPU_RUNNER_ENABLED)
+  add_subdirectory(mlir-spirv-cpu-runner)
+  list(APPEND MLIR_TEST_DEPENDS
+    mlir-spirv-cpu-runner
+    mlir_test_spirv_cpu_runner_c_wrappers
+  )
+endif()
+
 if(MLIR_VULKAN_RUNNER_ENABLED)
   list(APPEND MLIR_TEST_DEPENDS
     mlir-vulkan-runner
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -74,6 +74,7 @@
     ToolSubst('%linalg_test_lib_dir', config.linalg_test_lib_dir, unresolved='ignore'),
     ToolSubst('%mlir_runner_utils_dir', config.mlir_runner_utils_dir, unresolved='ignore'),
     ToolSubst('%rocm_wrapper_library_dir', config.rocm_wrapper_library_dir, unresolved='ignore'),
+    ToolSubst('%spirv_wrapper_library_dir', config.spirv_wrapper_library_dir, unresolved='ignore'),
     ToolSubst('%vulkan_wrapper_library_dir', config.vulkan_wrapper_library_dir, unresolved='ignore'),
 ])
 
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -41,6 +41,8 @@
 config.run_rocm_tests = @MLIR_ROCM_CONVERSIONS_ENABLED@
 config.rocm_wrapper_library_dir = "@MLIR_ROCM_WRAPPER_LIBRARY_DIR@"
 config.enable_rocm_runner = @MLIR_ROCM_RUNNER_ENABLED@
+config.spirv_wrapper_library_dir = "@MLIR_SPIRV_WRAPPER_LIBRARY_DIR@"
+config.enable_spirv_cpu_runner = @MLIR_SPIRV_CPU_RUNNER_ENABLED@
 config.vulkan_wrapper_library_dir = "@MLIR_VULKAN_WRAPPER_LIBRARY_DIR@"
 config.enable_vulkan_runner = @MLIR_VULKAN_RUNNER_ENABLED@
 config.enable_bindings_python = @MLIR_BINDINGS_PYTHON_ENABLED@
diff --git a/mlir/test/mlir-spirv-cpu-runner/CMakeLists.txt b/mlir/test/mlir-spirv-cpu-runner/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/test/mlir-spirv-cpu-runner/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_OPTIONAL_SOURCES
+  mlir_test_spirv_cpu_runner_c_wrappers.cpp
+  )
+
+add_llvm_library(mlir_test_spirv_cpu_runner_c_wrappers SHARED mlir_test_spirv_cpu_runner_c_wrappers.cpp)
+target_compile_definitions(mlir_test_spirv_cpu_runner_c_wrappers PRIVATE mlir_test_spirv_cpu_runner_c_wrappers_EXPORTS)
diff --git a/mlir/test/mlir-spirv-cpu-runner/double.mlir b/mlir/test/mlir-spirv-cpu-runner/double.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/mlir-spirv-cpu-runner/double.mlir
@@ -0,0 +1,68 @@
+// RUN: mlir-spirv-cpu-runner %s -e main --entry-point-result=void --shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%spirv_wrapper_library_dir/libmlir_test_spirv_cpu_runner_c_wrappers%shlibext
+
+// CHECK: [8,  8,  8,  8,  8,  8]
+module attributes {
+  gpu.container_module,
+  spv.target_env = #spv.target_env<
+    #spv.vce<v1.0, [Shader], [SPV_KHR_variable_pointers]>,
+    {max_compute_workgroup_invocations = 128 : i32,
+     max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>
+} {
+  gpu.module @kernels {
+    gpu.func @double(%arg0 : memref<6xi32>, %arg1 : memref<6xi32>)
+      kernel attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>}} {
+      %factor = constant 2 : i32
+
+      %i0 = constant 0 : index
+      %i1 = constant 1 : index
+      %i2 = constant 2 : index
+      %i3 = constant 3 : index
+      %i4 = constant 4 : index
+      %i5 = constant 5 : index
+
+      %x0 = load %arg0[%i0] : memref<6xi32>
+      %x1 = load %arg0[%i1] : memref<6xi32>
+      %x2 = load %arg0[%i2] : memref<6xi32>
+      %x3 = load %arg0[%i3] : memref<6xi32>
+      %x4 = load %arg0[%i4] : memref<6xi32>
+      %x5 = load %arg0[%i5] : memref<6xi32>
+
+      %y0 = muli %x0, %factor : i32
+      %y1 = muli %x1, %factor : i32
+      %y2 = muli %x2, %factor : i32
+      %y3 = muli %x3, %factor : i32
+      %y4 = muli %x4, %factor : i32
+      %y5 = muli %x5, %factor : i32
+
+      store %y0, %arg1[%i0] : memref<6xi32>
+      store %y1, %arg1[%i1] : memref<6xi32>
+      store %y2, %arg1[%i2] : memref<6xi32>
+      store %y3, %arg1[%i3] : memref<6xi32>
+      store %y4, %arg1[%i4] : memref<6xi32>
+      store %y5, %arg1[%i5] : memref<6xi32>
+      gpu.return
+    }
+  }
+  func @main() {
+    %input = alloc() : memref<6xi32>
+    %output = alloc() : memref<6xi32>
+    %four = constant 4 : i32
+    %zero = constant 0 : i32
+    %input_casted = memref_cast %input : memref<6xi32> to memref<?xi32>
+    %output_casted = memref_cast %output : memref<6xi32> to memref<?xi32>
+    call @fillI32Buffer(%input_casted, %four) : (memref<?xi32>, i32) -> ()
+    call @fillI32Buffer(%output_casted, %zero) : (memref<?xi32>, i32) -> ()
+
+    %one = constant 1 : index
+    "gpu.launch_func"(%one, %one, %one,
+                      %one, %one, %one,
+                      %input, %output) { kernel = @kernels::@double }
+        : (index, index, index, index, index, index, memref<6xi32>, memref<6xi32>) -> ()
+    %result = memref_cast %output : memref<6xi32> to memref<*xi32>
+    call @print_memref_i32(%result) : (memref<*xi32>) -> ()
+    return
+  }
+
+  func @fillI32Buffer(%arg0 : memref<?xi32>, %arg1 : i32)
+  func @print_memref_i32(%ptr : memref<*xi32>)
+}
diff --git a/mlir/test/mlir-spirv-cpu-runner/lit.local.cfg b/mlir/test/mlir-spirv-cpu-runner/lit.local.cfg
new file mode 100644
--- /dev/null
+++ b/mlir/test/mlir-spirv-cpu-runner/lit.local.cfg
@@ -0,0 +1,8 @@
+import sys
+
+# FIXME: llvm orc does not support the COFF rtld.
+if sys.platform == 'win32':
+    config.unsupported = True
+
+if not config.enable_spirv_cpu_runner:
+  config.unsupported = True
diff --git a/mlir/test/mlir-spirv-cpu-runner/mlir_test_spirv_cpu_runner_c_wrappers.cpp b/mlir/test/mlir-spirv-cpu-runner/mlir_test_spirv_cpu_runner_c_wrappers.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/test/mlir-spirv-cpu-runner/mlir_test_spirv_cpu_runner_c_wrappers.cpp
@@ -0,0 +1,38 @@
+//===- mlir_test_spirv_cpu_runner_c_wrappers.cpp - Runner testing library -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A small library for SPIR-V cpu runner testing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/ExecutionEngine/RunnerUtils.h"
+
+extern "C" void
+_mlir_ciface_fillI32Buffer(StridedMemRefType<int32_t, 1> *mem_ref,
+                           int32_t value) {
+  std::fill_n(mem_ref->basePtr, mem_ref->sizes[0], value);
+}
+
+extern "C" void
+_mlir_ciface_fillF32Buffer1D(StridedMemRefType<float, 1> *mem_ref,
+                             float value) {
+  std::fill_n(mem_ref->basePtr, mem_ref->sizes[0], value);
+}
+
+extern "C" void
+_mlir_ciface_fillF32Buffer2D(StridedMemRefType<float, 2> *mem_ref,
+                             float value) {
+  std::fill_n(mem_ref->basePtr, mem_ref->sizes[0] * mem_ref->sizes[1], value);
+}
+
+extern "C" void
+_mlir_ciface_fillF32Buffer3D(StridedMemRefType<float, 3> *mem_ref,
+                             float value) {
+  std::fill_n(mem_ref->basePtr,
+              mem_ref->sizes[0] * mem_ref->sizes[1] * mem_ref->sizes[2], value);
+}
diff --git a/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir b/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir
@@ -0,0 +1,62 @@
+// RUN: mlir-spirv-cpu-runner %s -e main --entry-point-result=void --shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%spirv_wrapper_library_dir/libmlir_test_spirv_cpu_runner_c_wrappers%shlibext
+
+// CHECK: [[[7.7,    0,    0], [7.7,    0,    0], [7.7,    0,    0]], [[0,    7.7,    0], [0,    7.7,    0], [0,    7.7,    0]], [[0,    0,    7.7], [0,    0,    7.7], [0,    0,    7.7]]]
+module attributes {
+  gpu.container_module,
+  spv.target_env = #spv.target_env<
+    #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class, SPV_KHR_8bit_storage]>,
+    {max_compute_workgroup_invocations = 128 : i32,
+     max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>
+} {
+  gpu.module @kernels {
+    gpu.func @sum(%arg0 : memref<3xf32>, %arg1 : memref<3x3xf32>, %arg2 :  memref<3x3x3xf32>)
+      kernel attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>}} {
+      %i0 = constant 0 : index
+      %i1 = constant 1 : index
+      %i2 = constant 2 : index
+
+      %x = load %arg0[%i0] : memref<3xf32>
+      %y = load %arg1[%i0, %i0] : memref<3x3xf32>
+      %sum = addf %x, %y : f32
+
+      store %sum, %arg2[%i0, %i0, %i0] : memref<3x3x3xf32>
+      store %sum, %arg2[%i0, %i1, %i0] : memref<3x3x3xf32>
+      store %sum, %arg2[%i0, %i2, %i0] : memref<3x3x3xf32>
+      store %sum, %arg2[%i1, %i0, %i1] : memref<3x3x3xf32>
+      store %sum, %arg2[%i1, %i1, %i1] : memref<3x3x3xf32>
+      store %sum, %arg2[%i1, %i2, %i1] : memref<3x3x3xf32>
+      store %sum, %arg2[%i2, %i0, %i2] : memref<3x3x3xf32>
+      store %sum, %arg2[%i2, %i1, %i2] : memref<3x3x3xf32>
+      store %sum, %arg2[%i2, %i2, %i2] : memref<3x3x3xf32>
+      gpu.return
+    }
+  }
+
+  func @main() {
+    %input1 = alloc() : memref<3xf32>
+    %input2 = alloc() : memref<3x3xf32>
+    %output = alloc() : memref<3x3x3xf32>
+    %0 = constant 0.0 : f32
+    %3 = constant 3.4 : f32
+    %4 = constant 4.3 : f32
+    %input1_casted = memref_cast %input1 : memref<3xf32> to memref<?xf32>
+    %input2_casted = memref_cast %input2 : memref<3x3xf32> to memref<?x?xf32>
+    %output_casted = memref_cast %output : memref<3x3x3xf32> to memref<?x?x?xf32>
+    call @fillF32Buffer1D(%input1_casted, %3) : (memref<?xf32>, f32) -> ()
+    call @fillF32Buffer2D(%input2_casted, %4) : (memref<?x?xf32>, f32) -> ()
+    call @fillF32Buffer3D(%output_casted, %0) : (memref<?x?x?xf32>, f32) -> ()
+
+    %one = constant 1 : index
+    "gpu.launch_func"(%one, %one, %one,
+                      %one, %one, %one,
+                      %input1, %input2, %output) { kernel = @kernels::@sum }
+        : (index, index, index, index, index, index, memref<3xf32>, memref<3x3xf32>, memref<3x3x3xf32>) -> ()
+    %result = memref_cast %output : memref<3x3x3xf32> to memref<*xf32>
+    call @print_memref_f32(%result) : (memref<*xf32>) -> ()
+    return
+  }
+  func @fillF32Buffer1D(%arg0 : memref<?xf32>, %arg1 : f32)
+  func @fillF32Buffer2D(%arg0 : memref<?x?xf32>, %arg1 : f32)
+  func @fillF32Buffer3D(%arg0 : memref<?x?x?xf32>, %arg1 : f32)
+  func @print_memref_f32(%arg0 : memref<*xf32>)
+}
diff --git a/mlir/tools/CMakeLists.txt b/mlir/tools/CMakeLists.txt
--- a/mlir/tools/CMakeLists.txt
+++ b/mlir/tools/CMakeLists.txt
@@ -5,5 +5,6 @@
 add_subdirectory(mlir-reduce)
 add_subdirectory(mlir-rocm-runner)
 add_subdirectory(mlir-shlib)
+add_subdirectory(mlir-spirv-cpu-runner)
 add_subdirectory(mlir-translate)
 add_subdirectory(mlir-vulkan-runner)
\ No newline at end of file
diff --git a/mlir/tools/mlir-spirv-cpu-runner/CMakeLists.txt b/mlir/tools/mlir-spirv-cpu-runner/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/tools/mlir-spirv-cpu-runner/CMakeLists.txt
@@ -0,0 +1,32 @@
+set(LLVM_OPTIONAL_SOURCES
+  mlir-spirv-cpu-runner.cpp
+  )
+
+if (MLIR_SPIRV_CPU_RUNNER_ENABLED)
+  message(STATUS "Building SPIR-V cpu runner")
+
+  add_llvm_tool(mlir-spirv-cpu-runner
+    mlir-spirv-cpu-runner.cpp
+  )
+
+  llvm_update_compile_flags(mlir-spirv-cpu-runner)
+
+  get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
+  get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
+  target_link_libraries(mlir-spirv-cpu-runner PRIVATE
+    ${conversion_libs}
+    ${dialect_libs}
+    MLIRAnalysis
+    MLIREDSC
+    MLIRExecutionEngine
+    MLIRIR
+    MLIRJitRunner
+    MLIRLLVMIR
+    MLIRParser
+    MLIRTargetLLVMIR
+    MLIRTransforms
+    MLIRTranslation
+    MLIRSupport
+  )
+endif()
diff --git a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp
@@ -0,0 +1,90 @@
+//===- mlir-spirv-cpu-runner.cpp - MLIR SPIR-V Execution on CPU -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Main entry point to a command line utility that executes an MLIR file on the
+// CPU by translating MLIR GPU module and host part to LLVM IR before
+// JIT-compiling and executing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h"
+#include "mlir/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVMPass.h"
+#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
+#include "mlir/Dialect/GPU/Passes.h"
+#include "mlir/Dialect/SPIRV/Passes.h"
+#include "mlir/Dialect/SPIRV/SPIRVOps.h"
+#include "mlir/ExecutionEngine/JitRunner.h"
+#include "mlir/ExecutionEngine/OptUtils.h"
+#include "mlir/InitAllDialects.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Target/LLVMIR.h"
+
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/TargetSelect.h"
+
+using namespace mlir;
+
+/// A utility function that builds llvm::Module from two nested MLIR modules.
+///
+/// module @main {
+///   module @kernel {
+///     // Some ops
+///   }
+///   // Some other ops
+/// }
+///
+/// Each of these two modules is translated to LLVM IR module, then they are
+/// linked together and returned.
+static std::unique_ptr<llvm::Module>
+convertMLIRModule(ModuleOp module, llvm::LLVMContext &context) {
+  // Verify that there is only one nested module.
+  auto modules = module.getOps<ModuleOp>();
+  if (!llvm::hasSingleElement(modules)) {
+    module.emitError("The module must contain exactly one nested module");
+    return nullptr;
+  }
+
+  // Translate nested module and erase it.
+  ModuleOp nested = *modules.begin();
+  std::unique_ptr<llvm::Module> kernelModule =
+      translateModuleToLLVMIR(nested, context);
+  nested.erase();
+
+  std::unique_ptr<llvm::Module> mainModule =
+      translateModuleToLLVMIR(module, context);
+  llvm::Linker::linkModules(*mainModule, std::move(kernelModule));
+  return mainModule;
+}
+
+static LogicalResult runMLIRPasses(ModuleOp module) {
+  PassManager passManager(module.getContext());
+  applyPassManagerCLOptions(passManager);
+  passManager.addPass(createGpuKernelOutliningPass());
+  passManager.addPass(createConvertGPUToSPIRVPass());
+
+  OpPassManager &nestedPM = passManager.nest<spirv::ModuleOp>();
+  nestedPM.addPass(spirv::createLowerABIAttributesPass());
+  nestedPM.addPass(spirv::createUpdateVersionCapabilityExtensionPass());
+  passManager.addPass(createLowerHostCodeToLLVMPass());
+  passManager.addPass(createConvertSPIRVToLLVMPass());
+  return passManager.run(module);
+}
+
+int main(int argc, char **argv) {
+  llvm::InitLLVM y(argc, argv);
+
+  llvm::InitializeNativeTarget();
+  llvm::InitializeNativeTargetAsmPrinter();
+  mlir::initializeLLVMPasses();
+
+  return mlir::JitRunnerMain(argc, argv, &runMLIRPasses, &convertMLIRModule);
+}