diff --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
--- a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
@@ -20,13 +20,14 @@
   func @foo(%buffer: memref<?xf32>) {
     %c8 = constant 8 : index
     %c32 = constant 32 : i32
-    "gpu.launch_func"(%c8, %c8, %c8, %c8, %c8, %c8, %c32, %buffer) {
-      kernel = @kernel_module::@kernel
-    } : (index, index, index, index, index, index, i32, memref<?xf32>) -> ()
+    gpu.launch_func @kernel_module::@kernel
+        blocks in (%c8, %c8, %c8)
+        threads in (%c8, %c8, %c8)
+        args(%c32 : i32, %buffer : memref<?xf32>)
     return
   }
 
-  // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : !llvm.i64   
+  // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : !llvm.i64
   // CHECK: [[ADDRESSOF:%.*]] = llvm.mlir.addressof @[[GLOBAL]]
   // CHECK: [[C0:%.*]] = llvm.mlir.constant(0 : index)
   // CHECK: [[BINARY:%.*]] = llvm.getelementptr [[ADDRESSOF]]{{\[}}[[C0]], [[C0]]]
diff --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
@@ -3,7 +3,8 @@
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_workgroup_id_x} : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@builtin_workgroup_id_x
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
     return
   }
 
@@ -26,7 +27,8 @@
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_workgroup_id_y} : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@builtin_workgroup_id_y
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
     return
   }
 
@@ -49,7 +51,8 @@
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_workgroup_id_z} : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@builtin_workgroup_id_z
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
     return
   }
 
@@ -72,7 +75,8 @@
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_workgroup_size_x} : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@builtin_workgroup_size_x
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
     return
   }
 
@@ -96,7 +100,8 @@
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_workgroup_size_y} : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@builtin_workgroup_size_y
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
     return
   }
 
@@ -117,7 +122,8 @@
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_workgroup_size_z} : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@builtin_workgroup_size_z
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
     return
   }
 
@@ -138,7 +144,8 @@
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_local_id_x} : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@builtin_local_id_x
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
     return
   }
 
@@ -161,7 +168,8 @@
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_num_workgroups_x} : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@builtin_num_workgroups_x
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
     return
   }
 
diff --git a/mlir/test/Conversion/GPUToSPIRV/if.mlir b/mlir/test/Conversion/GPUToSPIRV/if.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/if.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/if.mlir
@@ -7,7 +7,9 @@
 } {
   func @main(%arg0 : memref<10xf32>, %arg1 : i1) {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0, %arg0, %arg1) { kernel = @kernels::@kernel_simple_selection} : (index, index, index, index, index, index, memref<10xf32>, i1) -> ()
+    gpu.launch_func @kernels::@kernel_simple_selection
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
+        args(%arg0 : memref<10xf32>, %arg1 : i1)
     return
   }
 
diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
@@ -15,7 +15,10 @@
     %1 = subi %c4, %c0_0 : index
     %c1_1 = constant 1 : index
     %c1_2 = constant 1 : index
-    "gpu.launch_func"(%0, %c1_2, %c1_2, %1, %c1_2, %c1_2, %arg0, %arg1, %arg2, %c0, %c0_0, %c1, %c1_1) {kernel = @kernels::@load_store_kernel} : (index, index, index, index, index, index, memref<12x4xf32>, memref<12x4xf32>, memref<12x4xf32>, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@load_store_kernel
+        blocks in (%0, %c1_2, %c1_2) threads in (%1, %c1_2, %c1_2)
+        args(%arg0 : memref<12x4xf32>, %arg1 : memref<12x4xf32>, %arg2 : memref<12x4xf32>, 
+             %c0 : index, %c0_0 : index, %c1 : index, %c1_1 : index)
     return
   }
 
diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
@@ -7,7 +7,9 @@
 } {
   func @loop(%arg0 : memref<10xf32>, %arg1 : memref<10xf32>) {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0, %arg0, %arg1) { kernel = @kernels::@loop_kernel} : (index, index, index, index, index, index, memref<10xf32>, memref<10xf32>) -> ()
+    gpu.launch_func @kernels::@loop_kernel
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
+        args(%arg0 : memref<10xf32>, %arg1 : memref<10xf32>)
     return
   }
 
diff --git a/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir b/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir
@@ -22,8 +22,9 @@
     %0 = "op"() : () -> (f32)
     %1 = "op"() : () -> (memref<12xf32, 11>)
     %cst = constant 1 : index
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernels::@basic_module_structure }
-        : (index, index, index, index, index, index, f32, memref<12xf32, 11>) -> ()
+    gpu.launch_func @kernels::@basic_module_structure
+        blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)
+        args(%0 : f32, %1 : memref<12xf32, 11>)
     return
   }
 }
diff --git a/mlir/test/Conversion/GPUToSPIRV/simple.mlir b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/simple.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
@@ -18,8 +18,9 @@
     %0 = "op"() : () -> (f32)
     %1 = "op"() : () -> (memref<12xf32>)
     %cst = constant 1 : index
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernels::@basic_module_structure }
-        : (index, index, index, index, index, index, f32, memref<12xf32>) -> ()
+    gpu.launch_func @kernels::@basic_module_structure
+        blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)
+        args(%0 : f32, %1 : memref<12xf32>)
     return
   }
 }
@@ -63,8 +64,9 @@
     %0 = "op"() : () -> (f32)
     %1 = "op"() : () -> (memref<12xf32>)
     %cst = constant 1 : index
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernels::@missing_entry_point_abi }
-        : (index, index, index, index, index, index, f32, memref<12xf32>) -> ()
+    gpu.launch_func @kernels::@missing_entry_point_abi
+        blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)
+        args(%0 : f32, %1 : memref<12xf32>)
     return
   }
 }
diff --git a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
--- a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
+++ b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
@@ -26,7 +26,10 @@
   func @foo() {
     %0 = alloc() : memref<12xf32>
     %c1 = constant 1 : index
-    "gpu.launch_func"(%c1, %c1, %c1, %c1, %c1, %c1, %0) {kernel = @kernels::@kernel} : (index, index, index, index, index, index, memref<12xf32>) -> ()
+    gpu.launch_func @kernels::@kernel
+        blocks in(%c1, %c1, %c1)
+        threads in(%c1, %c1, %c1)
+        args(%0 : memref<12xf32>)
     return
   }
 }
diff --git a/mlir/test/mlir-vulkan-runner/addf.mlir b/mlir/test/mlir-vulkan-runner/addf.mlir
--- a/mlir/test/mlir-vulkan-runner/addf.mlir
+++ b/mlir/test/mlir-vulkan-runner/addf.mlir
@@ -37,8 +37,9 @@
 
     %cst1 = constant 1 : index
     %cst8 = constant 8 : index
-    "gpu.launch_func"(%cst8, %cst1, %cst1, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = @kernels::@kernel_add }
-        : (index, index, index, index, index, index, memref<8xf32>, memref<8xf32>, memref<8xf32>) -> ()
+    gpu.launch_func @kernels::@kernel_add
+        blocks in (%cst8, %cst1, %cst1) threads in (%cst1, %cst1, %cst1)
+        args(%arg0 : memref<8xf32>, %arg1 : memref<8xf32>, %arg2 : memref<8xf32>)
     %arg6 = memref_cast %arg5 : memref<?xf32> to memref<*xf32>
     call @print_memref_f32(%arg6) : (memref<*xf32>) -> ()
     return
diff --git a/mlir/test/mlir-vulkan-runner/addi.mlir b/mlir/test/mlir-vulkan-runner/addi.mlir
--- a/mlir/test/mlir-vulkan-runner/addi.mlir
+++ b/mlir/test/mlir-vulkan-runner/addi.mlir
@@ -36,8 +36,9 @@
 
     %cst1 = constant 1 : index
     %cst8 = constant 8 : index
-    "gpu.launch_func"(%cst8, %cst8, %cst8, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = @kernels::@kernel_addi }
-        : (index, index, index, index, index, index, memref<8xi32>, memref<8x8xi32>, memref<8x8x8xi32>) -> ()
+    gpu.launch_func @kernels::@kernel_addi
+        blocks in (%cst8, %cst8, %cst8) threads in (%cst1, %cst1, %cst1)
+        args(%arg0 : memref<8xi32>, %arg1 : memref<8x8xi32>, %arg2 : memref<8x8x8xi32>)
     %arg6 = memref_cast %arg5 : memref<?x?x?xi32> to memref<*xi32>
     call @print_memref_i32(%arg6) : (memref<*xi32>) -> ()
     return
diff --git a/mlir/test/mlir-vulkan-runner/addi8.mlir b/mlir/test/mlir-vulkan-runner/addi8.mlir
--- a/mlir/test/mlir-vulkan-runner/addi8.mlir
+++ b/mlir/test/mlir-vulkan-runner/addi8.mlir
@@ -37,8 +37,9 @@
 
     %cst1 = constant 1 : index
     %cst8 = constant 8 : index
-    "gpu.launch_func"(%cst8, %cst8, %cst8, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = @kernels::@kernel_addi }
-        : (index, index, index, index, index, index, memref<8xi8>, memref<8x8xi8>, memref<8x8x8xi32>) -> ()
+    gpu.launch_func @kernels::@kernel_addi
+        blocks in (%cst8, %cst8, %cst8) threads in (%cst1, %cst1, %cst1)
+        args(%arg0 : memref<8xi8>, %arg1 : memref<8x8xi8>, %arg2 : memref<8x8x8xi32>)
     %arg6 = memref_cast %arg5 : memref<?x?x?xi32> to memref<*xi32>
     call @print_memref_i32(%arg6) : (memref<*xi32>) -> ()
     return
diff --git a/mlir/test/mlir-vulkan-runner/mulf.mlir b/mlir/test/mlir-vulkan-runner/mulf.mlir
--- a/mlir/test/mlir-vulkan-runner/mulf.mlir
+++ b/mlir/test/mlir-vulkan-runner/mulf.mlir
@@ -38,8 +38,9 @@
 
     %cst1 = constant 1 : index
     %cst4 = constant 4 : index
-    "gpu.launch_func"(%cst4, %cst4, %cst1, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = @kernels::@kernel_mul }
-        : (index, index, index, index, index, index, memref<4x4xf32>, memref<4x4xf32>, memref<4x4xf32>) -> ()
+    gpu.launch_func @kernels::@kernel_mul
+        blocks in (%cst4, %cst4, %cst1) threads in(%cst1, %cst1, %cst1)
+        args(%arg0 : memref<4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<4x4xf32>)
     %arg6 = memref_cast %arg5 : memref<?x?xf32> to memref<*xf32>
     call @print_memref_f32(%arg6) : (memref<*xf32>) -> ()
     return
diff --git a/mlir/test/mlir-vulkan-runner/subf.mlir b/mlir/test/mlir-vulkan-runner/subf.mlir
--- a/mlir/test/mlir-vulkan-runner/subf.mlir
+++ b/mlir/test/mlir-vulkan-runner/subf.mlir
@@ -40,8 +40,9 @@
     %cst1 = constant 1 : index
     %cst4 = constant 4 : index
     %cst8 = constant 8 : index
-    "gpu.launch_func"(%cst8, %cst4, %cst4, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = @kernels::@kernel_sub }
-        : (index, index, index, index, index, index, memref<8x4x4xf32>, memref<4x4xf32>, memref<8x4x4xf32>) -> ()
+    gpu.launch_func @kernels::@kernel_sub
+        blocks in (%cst8, %cst4, %cst4) threads in (%cst1, %cst1, %cst1)
+        args(%arg0 : memref<8x4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<8x4x4xf32>)
     %arg6 = memref_cast %arg5 : memref<?x?x?xf32> to memref<*xf32>
     call @print_memref_f32(%arg6) : (memref<*xf32>) -> ()
     return
diff --git a/mlir/test/mlir-vulkan-runner/time.mlir b/mlir/test/mlir-vulkan-runner/time.mlir
--- a/mlir/test/mlir-vulkan-runner/time.mlir
+++ b/mlir/test/mlir-vulkan-runner/time.mlir
@@ -44,8 +44,9 @@
 
     %cst1 = constant 1 : index
     %cst128 = constant 128 : index
-    "gpu.launch_func"(%cst128, %cst1, %cst1, %cst128, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = @kernels::@kernel_add }
-        : (index, index, index, index, index, index, memref<16384xf32>, memref<16384xf32>, memref<16384xf32>) -> ()
+    gpu.launch_func @kernels::@kernel_add
+        blocks in (%cst128, %cst1, %cst1) threads in (%cst128, %cst1, %cst1)
+        args(%arg0 : memref<16384xf32>, %arg1 : memref<16384xf32>, %arg2 : memref<16384xf32>)
     %arg6 = memref_cast %arg5 : memref<?xf32> to memref<*xf32>
     return
   }