diff --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
@@ -10,8 +10,8 @@
   // CHECK-LABEL:  spv.module Logical GLSL450
   // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
   gpu.module @kernels {
-    gpu.func @builtin_workgroup_id_x()
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
+    gpu.func @builtin_workgroup_id_x() kernel
+      attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
       // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
       // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
@@ -33,8 +33,8 @@
   // CHECK-LABEL:  spv.module Logical GLSL450
   // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
   gpu.module @kernels {
-    gpu.func @builtin_workgroup_id_y()
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
+    gpu.func @builtin_workgroup_id_y() kernel
+      attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
       // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
       // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}}
@@ -56,8 +56,8 @@
   // CHECK-LABEL:  spv.module Logical GLSL450
   // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
   gpu.module @kernels {
-    gpu.func @builtin_workgroup_id_z()
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
+    gpu.func @builtin_workgroup_id_z() kernel
+      attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
       // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
       // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}}
@@ -78,8 +78,8 @@
 
   // CHECK-LABEL:  spv.module Logical GLSL450
   gpu.module @kernels {
-    gpu.func @builtin_workgroup_size_x()
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 1, 1]>: vector<3xi32>}} {
+    gpu.func @builtin_workgroup_size_x() kernel
+      attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]>: vector<3xi32>}} {
       // The constant value is obtained from the spv.entry_point_abi.
       // Note that this ignores the workgroup size specification in gpu.launch.
       // We may want to define gpu.workgroup_size and convert it to the entry
@@ -102,8 +102,8 @@
 
   // CHECK-LABEL:  spv.module Logical GLSL450
   gpu.module @kernels {
-    gpu.func @builtin_workgroup_size_y()
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
+    gpu.func @builtin_workgroup_size_y() kernel
+      attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
       // The constant value is obtained from the spv.entry_point_abi.
       // CHECK: spv.constant 4 : i32
       %0 = "gpu.block_dim"() {dimension = "y"} : () -> index
@@ -123,8 +123,8 @@
 
   // CHECK-LABEL:  spv.module Logical GLSL450
   gpu.module @kernels {
-    gpu.func @builtin_workgroup_size_z()
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
+    gpu.func @builtin_workgroup_size_z() kernel
+      attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
       // The constant value is obtained from the spv.entry_point_abi.
       // CHECK: spv.constant 1 : i32
       %0 = "gpu.block_dim"() {dimension = "z"} : () -> index
@@ -145,8 +145,8 @@
   // CHECK-LABEL:  spv.module Logical GLSL450
   // CHECK: spv.globalVariable [[LOCALINVOCATIONID:@.*]] built_in("LocalInvocationId")
   gpu.module @kernels {
-    gpu.func @builtin_local_id_x()
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
+    gpu.func @builtin_local_id_x() kernel
+      attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[ADDRESS:%.*]] = spv._address_of [[LOCALINVOCATIONID]]
       // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
       // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
@@ -168,8 +168,8 @@
   // CHECK-LABEL:  spv.module Logical GLSL450
   // CHECK: spv.globalVariable [[NUMWORKGROUPS:@.*]] built_in("NumWorkgroups")
   gpu.module @kernels {
-    gpu.func @builtin_num_workgroups_x()
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
+    gpu.func @builtin_num_workgroups_x() kernel
+      attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMWORKGROUPS]]
       // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
       // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
diff --git a/mlir/test/Conversion/GPUToSPIRV/if.mlir b/mlir/test/Conversion/GPUToSPIRV/if.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/if.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/if.mlir
@@ -15,8 +15,8 @@
 
   gpu.module @kernels {
     // CHECK-LABEL: @kernel_simple_selection
-    gpu.func @kernel_simple_selection(%arg2 : memref<10xf32>, %arg3 : i1)
-    attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
+    gpu.func @kernel_simple_selection(%arg2 : memref<10xf32>, %arg3 : i1) kernel
+    attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       %value = constant 0.0 : f32
       %i = constant 0 : index
 
@@ -36,8 +36,8 @@
     }
 
     // CHECK-LABEL: @kernel_nested_selection
-    gpu.func @kernel_nested_selection(%arg3 : memref<10xf32>, %arg4 : memref<10xf32>, %arg5 : i1, %arg6 : i1)
-    attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
+    gpu.func @kernel_nested_selection(%arg3 : memref<10xf32>, %arg4 : memref<10xf32>, %arg5 : i1, %arg6 : i1) kernel
+    attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       %i = constant 0 : index
       %j = constant 9 : index
 
diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
@@ -34,8 +34,8 @@
     // CHECK-SAME: [[ARG4:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 4), StorageBuffer>}
     // CHECK-SAME: [[ARG5:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 5), StorageBuffer>}
     // CHECK-SAME: [[ARG6:%.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 6), StorageBuffer>}
-    gpu.func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index)
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
+    gpu.func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index) kernel
+      attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[ADDRESSWORKGROUPID:%.*]] = spv._address_of [[WORKGROUPIDVAR]]
       // CHECK: [[WORKGROUPID:%.*]] = spv.Load "Input" [[ADDRESSWORKGROUPID]]
       // CHECK: [[WORKGROUPIDX:%.*]] = spv.CompositeExtract [[WORKGROUPID]]{{\[}}0 : i32{{\]}}
diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
@@ -14,8 +14,8 @@
   }
 
   gpu.module @kernels {
-    gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>)
-    attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
+    gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>) kernel
+    attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[LB:%.*]] = spv.constant 4 : i32
       %lb = constant 4 : index
       // CHECK: [[UB:%.*]] = spv.constant 42 : i32
diff --git a/mlir/test/Conversion/GPUToSPIRV/simple.mlir b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/simple.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
@@ -7,8 +7,8 @@
     // CHECK-SAME: {{%.*}}: f32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 0), StorageBuffer>}
     // CHECK-SAME: {{%.*}}: !spv.ptr<!spv.struct<!spv.array<12 x f32, stride=4> [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>}
     // CHECK-SAME: spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}
-    gpu.func @basic_module_structure(%arg0 : f32, %arg1 : memref<12xf32>)
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
+    gpu.func @basic_module_structure(%arg0 : f32, %arg1 : memref<12xf32>) kernel
+      attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
       // CHECK: spv.Return
       gpu.return
     }
@@ -30,7 +30,7 @@
   gpu.module @kernels {
     // expected-error @below {{failed to legalize operation 'gpu.func'}}
     // expected-remark @below {{match failure: missing 'spv.entry_point_abi' attribute}}
-    gpu.func @missing_entry_point_abi(%arg0 : f32, %arg1 : memref<12xf32>) attributes {gpu.kernel} {
+    gpu.func @missing_entry_point_abi(%arg0 : f32, %arg1 : memref<12xf32>) kernel {
       gpu.return
     }
   }
diff --git a/mlir/test/Dialect/GPU/all-reduce-max.mlir b/mlir/test/Dialect/GPU/all-reduce-max.mlir
--- a/mlir/test/Dialect/GPU/all-reduce-max.mlir
+++ b/mlir/test/Dialect/GPU/all-reduce-max.mlir
@@ -6,7 +6,7 @@
 
   // CHECK-LABEL: gpu.func @kernel(
   // CHECK-SAME: [[VAL_0:%.*]]: f32) workgroup([[VAL_1:%.*]] : memref<32xf32, 3>) kernel {
-  gpu.func @kernel(%arg0 : f32) attributes { gpu.kernel } {
+  gpu.func @kernel(%arg0 : f32) kernel {
     // CHECK:   [[VAL_2:%.*]] = constant 31 : i32
     // CHECK:   [[VAL_3:%.*]] = constant 0 : i32
     // CHECK:   [[VAL_4:%.*]] = constant 0 : index
diff --git a/mlir/test/Dialect/GPU/all-reduce.mlir b/mlir/test/Dialect/GPU/all-reduce.mlir
--- a/mlir/test/Dialect/GPU/all-reduce.mlir
+++ b/mlir/test/Dialect/GPU/all-reduce.mlir
@@ -6,7 +6,7 @@
 
   // CHECK-LABEL: gpu.func @kernel(
   // CHECK-SAME: [[VAL_0:%.*]]: f32) workgroup([[VAL_1:%.*]] : memref<32xf32, 3>) kernel {
-  gpu.func @kernel(%arg0 : f32) attributes { gpu.kernel } {
+  gpu.func @kernel(%arg0 : f32) kernel {
     // CHECK:   [[VAL_2:%.*]] = constant 31 : i32
     // CHECK:   [[VAL_3:%.*]] = constant 0 : i32
     // CHECK:   [[VAL_4:%.*]] = constant 0 : index
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -158,7 +158,7 @@
 
 module attributes {gpu.container_module} {
   gpu.module @kernels {
-    gpu.func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
+    gpu.func @kernel_1(%arg1 : !llvm<"float*">) kernel {
       gpu.return
     }
   }
@@ -177,7 +177,7 @@
 
 module attributes {gpu.container_module} {
   gpu.module @kernels {
-    gpu.func @kernel_1(%arg1 : f32) attributes { gpu.kernel } {
+    gpu.func @kernel_1(%arg1 : f32) kernel {
       gpu.return
     }
   }
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -27,7 +27,7 @@
   }
 
   gpu.module @kernels {
-    gpu.func @kernel_1(%arg0 : f32, %arg1 : memref<?xf32, 1>) attributes {gpu.kernel} {
+    gpu.func @kernel_1(%arg0 : f32, %arg1 : memref<?xf32, 1>) kernel {
       %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
       %tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
       %tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
@@ -59,7 +59,7 @@
       gpu.return
     }
 
-    gpu.func @kernel_2(%arg0: f32, %arg1: memref<?xf32, 1>) attributes {gpu.kernel} {
+    gpu.func @kernel_2(%arg0: f32, %arg1: memref<?xf32, 1>) kernel {
       gpu.return
     }
   }
diff --git a/mlir/test/mlir-vulkan-runner/addf.mlir b/mlir/test/mlir-vulkan-runner/addf.mlir
--- a/mlir/test/mlir-vulkan-runner/addf.mlir
+++ b/mlir/test/mlir-vulkan-runner/addf.mlir
@@ -10,7 +10,7 @@
 } {
   gpu.module @kernels {
     gpu.func @kernel_add(%arg0 : memref<8xf32>, %arg1 : memref<8xf32>, %arg2 : memref<8xf32>)
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>}} {
+      attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>} } kernel {
       %0 = "gpu.block_id"() {dimension = "x"} : () -> index
       %1 = load %arg0[%0] : memref<8xf32>
       %2 = load %arg1[%0] : memref<8xf32>
diff --git a/mlir/test/mlir-vulkan-runner/mulf.mlir b/mlir/test/mlir-vulkan-runner/mulf.mlir
--- a/mlir/test/mlir-vulkan-runner/mulf.mlir
+++ b/mlir/test/mlir-vulkan-runner/mulf.mlir
@@ -10,7 +10,7 @@
 } {
   gpu.module @kernels {
     gpu.func @kernel_mul(%arg0 : memref<4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<4x4xf32>)
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>}} {
+      attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>} } kernel {
       %x = "gpu.block_id"() {dimension = "x"} : () -> index
       %y = "gpu.block_id"() {dimension = "y"} : () -> index
       %1 = load %arg0[%x, %y] : memref<4x4xf32>
diff --git a/mlir/test/mlir-vulkan-runner/subf.mlir b/mlir/test/mlir-vulkan-runner/subf.mlir
--- a/mlir/test/mlir-vulkan-runner/subf.mlir
+++ b/mlir/test/mlir-vulkan-runner/subf.mlir
@@ -10,7 +10,7 @@
 } {
   gpu.module @kernels {
     gpu.func @kernel_sub(%arg0 : memref<8x4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<8x4x4xf32>)
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>}} {
+      attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>} } kernel {
       %x = "gpu.block_id"() {dimension = "x"} : () -> index
       %y = "gpu.block_id"() {dimension = "y"} : () -> index
       %z = "gpu.block_id"() {dimension = "z"} : () -> index
diff --git a/mlir/test/mlir-vulkan-runner/time.mlir b/mlir/test/mlir-vulkan-runner/time.mlir
--- a/mlir/test/mlir-vulkan-runner/time.mlir
+++ b/mlir/test/mlir-vulkan-runner/time.mlir
@@ -13,7 +13,7 @@
 } {
   gpu.module @kernels {
     gpu.func @kernel_add(%arg0 : memref<16384xf32>, %arg1 : memref<16384xf32>, %arg2 : memref<16384xf32>)
-      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[128, 1, 1]>: vector<3xi32>}} {
+      attributes { spv.entry_point_abi = {local_size = dense<[128, 1, 1]>: vector<3xi32>} } kernel {
       %bid = "gpu.block_id"() {dimension = "x"} : () -> index
       %tid = "gpu.thread_id"() {dimension = "x"} : () -> index
       %cst = constant 128 : index