diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -18,10 +18,6 @@
 include "mlir/IR/SymbolInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 
-// Type constraint accepting standard integers, indices.
-def IntOrIndex : TypeConstraint<
-  Or<[AnySignlessInteger.predicate, Index.predicate]>, "integer or index">;
-
 //===----------------------------------------------------------------------===//
 // GPU Dialect operations.
 //===----------------------------------------------------------------------===//
@@ -296,9 +292,9 @@
 }
 
 def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
-    Arguments<(ins IntOrIndex:$gridSizeX, IntOrIndex:$gridSizeY,
-               IntOrIndex:$gridSizeZ, IntOrIndex:$blockSizeX,
-               IntOrIndex:$blockSizeY, IntOrIndex:$blockSizeZ,
+    Arguments<(ins SymbolRefAttr:$kernel,
+               Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
+               Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
                Variadic<AnyType>:$operands)>,
     Results<(outs)> {
   let summary = "Launches a function as a GPU kernel";
@@ -312,8 +308,8 @@
     function is required to be a gpu.module. And finally, the module containing
     the kernel module (which thus cannot be the top-level module) is required
     to have the `gpu.container_module` attribute. The `gpu.launch_func`
-    operation has a symbol attribute named `kernel` to identify the fully specified
-    kernel function to launch (both the gpu.module and func).
+    operation has a symbol attribute named `kernel` to identify the fully 
+    specified kernel function to launch (both the gpu.module and func).
 
     The operation takes at least six operands, with the first three operands
     being grid sizes along x,y,z dimensions and the following three being block
@@ -321,8 +317,6 @@
     unused sizes must be explicitly set to `1`. The remaining operands are
     passed as arguments to the kernel function.
 
-    A custom syntax for this operation is currently not available.
-
     Example:
 
     ```mlir
@@ -357,13 +351,11 @@
         }
       }
 
-      "gpu.launch_func"(%cst, %cst, %cst,  // Grid sizes.
-                        %cst, %cst, %cst,  // Block sizes.
-                        %arg0, %arg1)      // Arguments passed to the kernel.
-            { kernel_module = @kernels,    // Module containing the kernel.
-              kernel = "kernel_1" }        // Kernel function.
-            : (index, index, index, index, index, index, f32, memref<?xf32, 1>)
-              -> ()
+      gpu.launch_func
+          @kernels::@kernel_1                          // Kernel function.
+          blocks in (%cst, %cst, %cst)                 // Grid size.
+          threads in (%cst, %cst, %cst)                // Block size.
+          args(%arg0 : f32, %arg1 : memref<?xf32, 1>)  // Kernel arguments.
     }
     ```
   }];
@@ -371,19 +363,12 @@
   let skipDefaultBuilders = 1;
 
   let builders = [
-    OpBuilder<"GPUFuncOp kernelFunc, "
-              "Value gridSizeX, Value gridSizeY, Value gridSizeZ, "
-              "Value blockSizeX, Value blockSizeY, Value blockSizeZ, "
-              "ValueRange kernelOperands">,
     OpBuilder<"GPUFuncOp kernelFunc, "
               "KernelDim3 gridSize, KernelDim3 blockSize, "
               "ValueRange kernelOperands">
   ];
 
   let extraClassDeclaration = [{
-    /// The kernel function specified by the operation's `kernel` attribute.
-    SymbolRefAttr kernel();
-
     /// The number of operands passed to the kernel function.
     unsigned getNumKernelOperands();
 
@@ -416,6 +401,13 @@
   }];
 
   let verifier = [{ return ::verify(*this); }];
+  let assemblyFormat = [{
+      $kernel
+      `blocks` `in` ` ` `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)`
+      `threads` `in` ` ` `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)`
+      custom<LaunchFuncOperands>($operands, type($operands))
+      attr-dict
+  }];
 }
 
 def GPU_LaunchOp : GPU_Op<"launch">,
diff --git a/mlir/include/mlir/IR/FunctionImplementation.h b/mlir/include/mlir/IR/FunctionImplementation.h
--- a/mlir/include/mlir/IR/FunctionImplementation.h
+++ b/mlir/include/mlir/IR/FunctionImplementation.h
@@ -49,6 +49,16 @@
 using FuncTypeBuilder = function_ref<Type(
     Builder &, ArrayRef<Type>, ArrayRef<Type>, VariadicFlag, std::string &)>;
 
+/// Parses function arguments using `parser`. The `allowVariadic` argument
+/// indicates whether functions with variadic arguments are supported. The
+/// trailing arguments are populated by this function with names, types and
+/// attributes of the arguments.
+ParseResult parseFunctionArgumentList(
+    OpAsmParser &parser, bool allowAttributes, bool allowVariadic,
+    SmallVectorImpl<OpAsmParser::OperandType> &argNames,
+    SmallVectorImpl<Type> &argTypes, SmallVectorImpl<NamedAttrList> &argAttrs,
+    bool &isVariadic);
+
 /// Parses a function signature using `parser`. The `allowVariadic` argument
 /// indicates whether functions with variadic arguments are supported. The
 /// trailing arguments are populated by this function with names, types and
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -428,12 +428,11 @@
 //===----------------------------------------------------------------------===//
 
 void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
-                         GPUFuncOp kernelFunc, Value gridSizeX, Value gridSizeY,
-                         Value gridSizeZ, Value blockSizeX, Value blockSizeY,
-                         Value blockSizeZ, ValueRange kernelOperands) {
+                         GPUFuncOp kernelFunc, KernelDim3 gridSize,
+                         KernelDim3 blockSize, ValueRange kernelOperands) {
   // Add grid and block sizes as op operands, followed by the data operands.
-  result.addOperands(
-      {gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ});
+  result.addOperands({gridSize.x, gridSize.y, gridSize.z, blockSize.x,
+                      blockSize.y, blockSize.z});
   result.addOperands(kernelOperands);
   auto kernelModule = kernelFunc.getParentOfType<GPUModuleOp>();
   auto kernelSymbol = builder.getSymbolRefAttr(
@@ -441,17 +440,6 @@
   result.addAttribute(getKernelAttrName(), kernelSymbol);
 }
 
-void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
-                         GPUFuncOp kernelFunc, KernelDim3 gridSize,
-                         KernelDim3 blockSize, ValueRange kernelOperands) {
-  build(builder, result, kernelFunc, gridSize.x, gridSize.y, gridSize.z,
-        blockSize.x, blockSize.y, blockSize.z, kernelOperands);
-}
-
-SymbolRefAttr LaunchFuncOp::kernel() {
-  return getAttrOfType<SymbolRefAttr>(getKernelAttrName());
-}
-
 unsigned LaunchFuncOp::getNumKernelOperands() {
   return getNumOperands() - kNumConfigOperands;
 }
@@ -492,6 +480,33 @@
   return success();
 }
 
+static ParseResult
+parseLaunchFuncOperands(OpAsmParser &parser,
+                        SmallVectorImpl<OpAsmParser::OperandType> &argNames,
+                        SmallVectorImpl<Type> &argTypes) {
+  if (parser.parseOptionalKeyword("args"))
+    return success();
+  SmallVector<NamedAttrList, 4> argAttrs;
+  bool isVariadic = false;
+  return impl::parseFunctionArgumentList(parser, /*allowAttributes=*/false,
+                                         /*allowVariadic=*/false, argNames,
+                                         argTypes, argAttrs, isVariadic);
+}
+
+static void printLaunchFuncOperands(OpAsmPrinter &printer,
+                                    OperandRange operands, TypeRange types) {
+  if (operands.empty())
+    return;
+  printer << "args(";
+  llvm::interleaveComma(llvm::zip(operands, types), printer,
+                        [&](const auto &pair) {
+                          printer.printOperand(std::get<0>(pair));
+                          printer << " : ";
+                          printer.printType(std::get<1>(pair));
+                        });
+  printer << ")";
+}
+
 //===----------------------------------------------------------------------===//
 // GPUFuncOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/IR/FunctionImplementation.cpp b/mlir/lib/IR/FunctionImplementation.cpp
--- a/mlir/lib/IR/FunctionImplementation.cpp
+++ b/mlir/lib/IR/FunctionImplementation.cpp
@@ -13,11 +13,11 @@
 
 using namespace mlir;
 
-static ParseResult
-parseArgumentList(OpAsmParser &parser, bool allowVariadic,
-                  SmallVectorImpl<Type> &argTypes,
-                  SmallVectorImpl<OpAsmParser::OperandType> &argNames,
-                  SmallVectorImpl<NamedAttrList> &argAttrs, bool &isVariadic) {
+ParseResult mlir::impl::parseFunctionArgumentList(
+    OpAsmParser &parser, bool allowAttributes, bool allowVariadic,
+    SmallVectorImpl<OpAsmParser::OperandType> &argNames,
+    SmallVectorImpl<Type> &argTypes, SmallVectorImpl<NamedAttrList> &argAttrs,
+    bool &isVariadic) {
   if (parser.parseLParen())
     return failure();
 
@@ -56,6 +56,8 @@
     NamedAttrList attrs;
     if (parser.parseOptionalAttrDict(attrs))
       return failure();
+    if (!allowAttributes && !attrs.empty())
+      return parser.emitError(loc, "expected arguments without attributes");
     argAttrs.push_back(attrs);
     return success();
   };
@@ -129,8 +131,9 @@
     SmallVectorImpl<Type> &argTypes, SmallVectorImpl<NamedAttrList> &argAttrs,
     bool &isVariadic, SmallVectorImpl<Type> &resultTypes,
     SmallVectorImpl<NamedAttrList> &resultAttrs) {
-  if (parseArgumentList(parser, allowVariadic, argTypes, argNames, argAttrs,
-                        isVariadic))
+  bool allowArgAttrs = true;
+  if (parseFunctionArgumentList(parser, allowArgAttrs, allowVariadic, argNames,
+                                argTypes, argAttrs, isVariadic))
     return failure();
   if (succeeded(parser.parseOptionalArrow()))
     return parseFunctionResultList(parser, resultTypes, resultAttrs);
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -45,8 +45,7 @@
 
 func @launch_func_missing_parent_module_attribute(%sz : index) {
   // expected-error@+1 {{expected the closest surrounding module to have the 'gpu.container_module' attribute}}
-  "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {foo = "bar"}
-      : (index, index, index, index, index, index) -> ()
+  gpu.launch_func @foo::@bar blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
   return
 }
 
@@ -54,8 +53,8 @@
 
 module attributes {gpu.container_module} {
   func @launch_func_missing_callee_attribute(%sz : index) {
-    // expected-error@+1 {{symbol reference attribute 'kernel' must be specified}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {foo = "bar"}
+    // expected-error@+1 {{'gpu.launch_func' op requires attribute 'kernel'}}
+    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
         : (index, index, index, index, index, index) -> ()
     return
   }
@@ -65,9 +64,8 @@
 
 module attributes {gpu.container_module} {
   func @launch_func_no_function_attribute(%sz : index) {
-    // expected-error@+1 {{symbol reference attribute 'kernel' must be specified}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {kernel = 10}
-        : (index, index, index, index, index, index) -> ()
+    // expected-error@+1 {{custom op 'gpu.launch_func' invalid kind of attribute specified}}
+    gpu.launch_func "foo" blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
   }
 }
@@ -77,9 +75,7 @@
 module attributes {gpu.container_module} {
   func @launch_func_undefined_module(%sz : index) {
     // expected-error@+1 {{kernel module 'kernels' is undefined}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
-    { kernel = @kernels::@kernel_1 }
-        : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
   }
 }
@@ -103,9 +99,7 @@
 
   func @launch_func_missing_module_attribute(%sz : index) {
     // expected-error@+1 {{kernel module 'kernels' is undefined}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
-    { kernel = @kernels::@kernel_1 }
-        : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
   }
 }
@@ -117,9 +111,7 @@
 
   func @launch_func_undefined_function(%sz : index) {
     // expected-error@+1 {{kernel function '@kernels::@kernel_1' is undefined}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
-    { kernel = @kernels::@kernel_1 }
-        : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
   }
 }
@@ -135,9 +127,7 @@
 
   func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr<float>) {
     // expected-error@+1 {{kernel module 'kernels' is undefined}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
-    {kernel = @kernels::@kernel_1}
-        : (index, index, index, index, index, index, !llvm.ptr<float>) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr<float>)
     return
   }
 }
@@ -153,9 +143,7 @@
 
   func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr<float>) {
     // expected-error@+1 {{kernel function is missing the 'gpu.kernel' attribute}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
-    {kernel = @kernels::@kernel_1}
-        : (index, index, index, index, index, index, !llvm.ptr<float>) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr<float>)
     return
   }
 }
@@ -171,10 +159,7 @@
 
   func @launch_func_kernel_operand_size(%sz : index, %arg : !llvm.ptr<float>) {
     // expected-error@+1 {{got 2 kernel operands but expected 1}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg, %arg)
-        {kernel = @kernels::@kernel_1}
-        : (index, index, index, index, index, index, !llvm.ptr<float>,
-           !llvm.ptr<float>) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr<float>, %arg : !llvm.ptr<float>)
     return
   }
 }
@@ -190,9 +175,17 @@
 
   func @launch_func_kernel_operand_types(%sz : index, %arg : f32) {
     // expected-err@+1 {{type of function argument 0 does not match}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
-        {kernel = @kernels::@kernel_1}
-        : (index, index, index, index, index, index, f32) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : f32)
+    return
+  }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+  func @launch_func_kernel_operand_attr(%sz : index) {
+    // expected-error@+1 {{expected arguments without attributes}}
+    gpu.launch_func @foo::@bar blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%sz : index {foo})
     return
   }
 }
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -63,7 +63,7 @@
       gpu.return
     }
 
-    gpu.func @kernel_2(%arg0: f32, %arg1: memref<?xf32, 1>) kernel {
+    gpu.func @kernel_2() kernel {
       gpu.return
     }
   }
@@ -74,15 +74,11 @@
     // CHECK: %{{.*}} = constant 8
     %cst = constant 8 : index
 
-    // CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = @kernels::@kernel_1} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1)
-    { kernel = @kernels::@kernel_1}
-        : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
+    // CHECK: gpu.launch_func @kernels::@kernel_1 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) args(%{{.*}} : f32, %{{.*}} : memref<?xf32, 1>)
+    gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) args(%0 : f32, %1 : memref<?xf32, 1>)
 
-    // CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = @kernels::@kernel_2} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1)
-    { kernel = @kernels::@kernel_2}
-        : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
+    // CHECK: gpu.launch_func @kernels::@kernel_2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}})
+    gpu.launch_func @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)
 
     return
   }
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -21,7 +21,7 @@
   // CHECK: %[[BDIMZ:.*]] = constant 28
   %bDimZ = constant 28 : index
 
-  // CHECK: "gpu.launch_func"(%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]], %[[BDIMX]], %[[BDIMY]], %[[BDIMZ]], %[[ARG0]], %[[ARG1]]) {kernel = @launch_kernel::@launch_kernel} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
+  // CHECK: gpu.launch_func @launch_kernel::@launch_kernel blocks in (%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]]) threads in (%[[BDIMX]], %[[BDIMY]], %[[BDIMZ]]) args(%[[ARG0]] : f32, %[[ARG1]] : memref<?xf32, 1>)
   // CHECK-NOT: gpu.launch blocks
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY,
                                        %grid_z = %gDimZ)
@@ -64,14 +64,14 @@
 func @multiple_launches() {
   // CHECK: %[[CST:.*]] = constant 8 : index
   %cst = constant 8 : index
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = @multiple_launches_kernel::@multiple_launches_kernel} : (index, index, index, index, index, index) -> ()
+  // CHECK: gpu.launch_func @multiple_launches_kernel::@multiple_launches_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]])
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
              threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,
                                         %block_z = %cst) {
     gpu.terminator
   }
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = @multiple_launches_kernel_0::@multiple_launches_kernel} : (index, index, index, index, index, index) -> ()
+  // CHECK: gpu.launch_func @multiple_launches_kernel_0::@multiple_launches_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]])
   gpu.launch blocks(%bx2, %by2, %bz2) in (%grid_x2 = %cst, %grid_y2 = %cst,
                                           %grid_z2 = %cst)
              threads(%tx2, %ty2, %tz2) in (%block_x2 = %cst, %block_y2 = %cst,
@@ -95,7 +95,7 @@
   %cst2 = constant 2 : index
   %c0 = constant 0 : index
   %cst3 = "secret_constant"() : () -> index
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %{{.*}}, %{{.*}}) {kernel = @extra_constants_not_inlined_kernel::@extra_constants_not_inlined_kernel} : (index, index, index, index, index, index, memref<?xf32>, index) -> ()
+  // CHECK: gpu.launch_func @extra_constants_not_inlined_kernel::@extra_constants_not_inlined_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args({{.*}} : memref<?xf32>, {{.*}} : index)
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
              threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,
@@ -119,7 +119,7 @@
   %cst2 = constant 2 : index
   %c0 = constant 0 : index
   %cst3 = dim %arg0, %c0 : memref<?xf32>
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[ARG0]]) {kernel = @extra_constants_kernel::@extra_constants_kernel} : (index, index, index, index, index, index, memref<?xf32>) -> ()
+  // CHECK: gpu.launch_func @extra_constants_kernel::@extra_constants_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args(%[[ARG0]] : memref<?xf32>)
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
              threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,
@@ -130,7 +130,7 @@
   return
 }
 
-// CHECK-LABEL: func @extra_constants_kernel
+// CHECK-LABEL: func @extra_constants_kernel(
 // CHECK-SAME: %[[KARG0:.*]]: memref<?xf32>
 // CHECK: constant 2
 // CHECK: constant 0
@@ -147,7 +147,7 @@
   %c0 = constant 0 : index
   // CHECK: dim %[[ARG1]]
   %cst3 = dim %arg1, %c0 : memref<?xf32>
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[ARG0]], %{{.*}}) {kernel = @extra_constants_noarg_kernel::@extra_constants_noarg_kernel} : (index, index, index, index, index, index, memref<?xf32>, index) -> ()
+  // CHECK: gpu.launch_func @extra_constants_noarg_kernel::@extra_constants_noarg_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args(%[[ARG0]] : memref<?xf32>, {{.*}} : index)
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
              threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,
@@ -158,7 +158,7 @@
   return
 }
 
-// CHECK-LABEL: func @extra_constants_noarg_kernel
+// CHECK-LABEL: func @extra_constants_noarg_kernel(
 // CHECK-SAME: %[[KARG0:.*]]: memref<?xf32>, %[[KARG1:.*]]: index
 // CHECK: %[[KCST:.*]] = constant 2
 // CHECK: "use"(%[[KCST]], %[[KARG0]], %[[KARG1]])