diff --git a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h
--- a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h
+++ b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h
@@ -17,13 +17,13 @@
 
 namespace mlir {
 class SPIRVTypeConverter;
+
 /// Appends to a pattern list additional patterns for translating GPU Ops to
-/// SPIR-V ops. Needs the workgroup size as input since SPIR-V/Vulkan requires
-/// the workgroup size to be statically specified.
+/// SPIR-V ops. For a gpu.func to be converted, it should have a
+/// spv.entry_point_abi attribute.
 void populateGPUToSPIRVPatterns(MLIRContext *context,
                                 SPIRVTypeConverter &typeConverter,
-                                OwningRewritePatternList &patterns,
-                                ArrayRef<int64_t> workGroupSize);
+                                OwningRewritePatternList &patterns);
 } // namespace mlir
 
 #endif // MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRV_H
diff --git a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h
--- a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h
+++ b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h
@@ -22,10 +22,9 @@
 class ModuleOp;
 template <typename T> class OpPassBase;
 
-/// Pass to convert GPU Ops to SPIR-V ops.  Needs the workgroup size as input
-/// since SPIR-V/Vulkan requires the workgroup size to be statically specified.
-std::unique_ptr<OpPassBase<ModuleOp>>
-createConvertGPUToSPIRVPass(ArrayRef<int64_t> workGroupSize);
+/// Pass to convert GPU Ops to SPIR-V ops. For a gpu.func to be converted, it
+/// should have a spv.entry_point_abi attribute.
+std::unique_ptr<OpPassBase<ModuleOp>> createConvertGPUToSPIRVPass();
 
 } // namespace mlir
 #endif // MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRVPASS_H
diff --git a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h
--- a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h
+++ b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h
@@ -112,6 +112,15 @@
 EntryPointABIAttr getEntryPointABIAttr(ArrayRef<int32_t> localSize,
                                        MLIRContext *context);
 
+/// Queries the entry point ABI on the nearest function-like op containing the
+/// given `op`. Returns null attribute if not found.
+EntryPointABIAttr lookupEntryPointABI(Operation *op);
+
+/// Queries the local workgroup size from entry point ABI on the nearest
+/// function-like op containing the given `op`. Returns null attribute if not
+/// found.
+DenseIntElementsAttr lookupLocalWorkGroupSize(Operation *op);
+
 /// Returns a default resource limits attribute that uses numbers from
 /// "Table 46. Required Limits" of the Vulkan spec.
 ResourceLimitsAttr getDefaultResourceLimits(MLIRContext *context);
@@ -128,11 +137,6 @@
 /// extensions) if not provided.
 TargetEnvAttr lookupTargetEnvOrDefault(Operation *op);
 
-/// Queries the local workgroup size from entry point ABI on the nearest
-/// function-like op containing the given `op`. Returns null attribute if not
-/// found.
-DenseIntElementsAttr lookupLocalWorkGroupSize(Operation *op);
-
 } // namespace spirv
 } // namespace mlir
 
diff --git a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
--- a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
@@ -82,16 +82,9 @@
 };
 
 /// Pattern to convert a kernel function in GPU dialect within a spv.module.
-class KernelFnConversion final : public SPIRVOpLowering<gpu::GPUFuncOp> {
+class GPUFuncOpConversion final : public SPIRVOpLowering<gpu::GPUFuncOp> {
 public:
-  KernelFnConversion(MLIRContext *context, SPIRVTypeConverter &converter,
-                     ArrayRef<int64_t> workGroupSize,
-                     PatternBenefit benefit = 1)
-      : SPIRVOpLowering<gpu::GPUFuncOp>(context, converter, benefit) {
-    auto config = workGroupSize.take_front(3);
-    workGroupSizeAsInt32.assign(config.begin(), config.end());
-    workGroupSizeAsInt32.resize(3, 1);
-  }
+  using SPIRVOpLowering<gpu::GPUFuncOp>::SPIRVOpLowering;
 
   PatternMatchResult
   matchAndRewrite(gpu::GPUFuncOp funcOp, ArrayRef<Value> operands,
@@ -352,13 +345,11 @@
   return newFuncOp;
 }
 
-PatternMatchResult
-KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp,
-                                    ArrayRef<Value> operands,
-                                    ConversionPatternRewriter &rewriter) const {
-  if (!gpu::GPUDialect::isKernel(funcOp)) {
+PatternMatchResult GPUFuncOpConversion::matchAndRewrite(
+    gpu::GPUFuncOp funcOp, ArrayRef<Value> operands,
+    ConversionPatternRewriter &rewriter) const {
+  if (!gpu::GPUDialect::isKernel(funcOp))
     return matchFailure();
-  }
 
   SmallVector<spirv::InterfaceVarABIAttr, 4> argABI;
   for (auto argNum : llvm::seq<unsigned>(0, funcOp.getNumArguments())) {
@@ -366,14 +357,15 @@
         0, argNum, spirv::StorageClass::StorageBuffer, rewriter.getContext()));
   }
 
-  auto context = rewriter.getContext();
-  auto entryPointAttr =
-      spirv::getEntryPointABIAttr(workGroupSizeAsInt32, context);
+  auto entryPointAttr = spirv::lookupEntryPointABI(funcOp);
+  if (!entryPointAttr) {
+    funcOp.emitRemark("match failure: missing 'spv.entry_point_abi' attribute");
+    return matchFailure();
+  }
   FuncOp newFuncOp = lowerAsEntryFunction(funcOp, typeConverter, rewriter,
                                           entryPointAttr, argABI);
-  if (!newFuncOp) {
+  if (!newFuncOp)
     return matchFailure();
-  }
   newFuncOp.removeAttr(Identifier::get(gpu::GPUDialect::getKernelFuncAttrName(),
                                        rewriter.getContext()));
   return matchSuccess();
@@ -429,13 +421,11 @@
 
 void mlir::populateGPUToSPIRVPatterns(MLIRContext *context,
                                       SPIRVTypeConverter &typeConverter,
-                                      OwningRewritePatternList &patterns,
-                                      ArrayRef<int64_t> workGroupSize) {
+                                      OwningRewritePatternList &patterns) {
   populateWithGenerated(context, &patterns);
-  patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
   patterns.insert<
-      ForOpConversion, GPUModuleConversion, GPUReturnOpConversion,
-      IfOpConversion,
+      ForOpConversion, GPUFuncOpConversion, GPUModuleConversion,
+      GPUReturnOpConversion, IfOpConversion,
       LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
       LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
       LaunchConfigConversion<gpu::ThreadIdOp,
diff --git a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
--- a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
@@ -24,33 +24,17 @@
 using namespace mlir;
 
 namespace {
-/// Pass to lower GPU Dialect to SPIR-V. The pass only converts those functions
-/// that have the "gpu.kernel" attribute, i.e. those functions that are
-/// referenced in gpu::LaunchKernelOp operations. For each such function
+/// Pass to lower GPU Dialect to SPIR-V. The pass only converts the gpu.func ops
+/// inside gpu.module ops. i.e., the function that are referenced in
+/// gpu.launch_func ops. For each such function
 ///
 /// 1) Create a spirv::ModuleOp, and clone the function into spirv::ModuleOp
 /// (the original function is still needed by the gpu::LaunchKernelOp, so cannot
 /// replace it).
 ///
 /// 2) Lower the body of the spirv::ModuleOp.
-class GPUToSPIRVPass : public ModulePass<GPUToSPIRVPass> {
-public:
-  GPUToSPIRVPass() = default;
-  GPUToSPIRVPass(const GPUToSPIRVPass &) {}
-  GPUToSPIRVPass(ArrayRef<int64_t> workGroupSize) {
-    this->workGroupSize = workGroupSize;
-  }
-
+struct GPUToSPIRVPass : public ModulePass<GPUToSPIRVPass> {
   void runOnModule() override;
-
-private:
-  /// Command line option to specify the workgroup size.
-  ListOption<int64_t> workGroupSize{
-      *this, "workgroup-size",
-      llvm::cl::desc(
-          "Workgroup Sizes in the SPIR-V module for x, followed by y, followed "
-          "by z dimension of the dispatch (others will be ignored)"),
-      llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
 };
 } // namespace
 
@@ -70,7 +54,7 @@
 
   SPIRVTypeConverter typeConverter;
   OwningRewritePatternList patterns;
-  populateGPUToSPIRVPatterns(context, typeConverter, patterns, workGroupSize);
+  populateGPUToSPIRVPatterns(context, typeConverter, patterns);
   populateStandardToSPIRVPatterns(context, typeConverter, patterns);
 
   std::unique_ptr<ConversionTarget> target = spirv::SPIRVConversionTarget::get(
@@ -84,9 +68,8 @@
   }
 }
 
-std::unique_ptr<OpPassBase<ModuleOp>>
-mlir::createConvertGPUToSPIRVPass(ArrayRef<int64_t> workGroupSize) {
-  return std::make_unique<GPUToSPIRVPass>(workGroupSize);
+std::unique_ptr<OpPassBase<ModuleOp>> mlir::createConvertGPUToSPIRVPass() {
+  return std::make_unique<GPUToSPIRVPass>();
 }
 
 static PassRegistration<GPUToSPIRVPass>
diff --git a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp
--- a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp
+++ b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp
@@ -158,6 +158,26 @@
       context);
 }
 
+spirv::EntryPointABIAttr spirv::lookupEntryPointABI(Operation *op) {
+  while (op && !op->hasTrait<OpTrait::FunctionLike>())
+    op = op->getParentOp();
+  if (!op)
+    return {};
+
+  if (auto attr = op->getAttrOfType<spirv::EntryPointABIAttr>(
+          spirv::getEntryPointABIAttrName()))
+    return attr;
+
+  return {};
+}
+
+DenseIntElementsAttr spirv::lookupLocalWorkGroupSize(Operation *op) {
+  if (auto entryPoint = spirv::lookupEntryPointABI(op))
+    return entryPoint.local_size();
+
+  return {};
+}
+
 spirv::ResourceLimitsAttr
 spirv::getDefaultResourceLimits(MLIRContext *context) {
   auto i32Type = IntegerType::get(32, context);
@@ -187,16 +207,3 @@
     return attr;
   return getDefaultTargetEnv(op->getContext());
 }
-
-DenseIntElementsAttr spirv::lookupLocalWorkGroupSize(Operation *op) {
-  while (op && !op->hasTrait<OpTrait::FunctionLike>())
-    op = op->getParentOp();
-  if (!op)
-    return {};
-
-  if (auto attr = op->getAttrOfType<spirv::EntryPointABIAttr>(
-          spirv::getEntryPointABIAttrName()))
-    return attr.local_size();
-
-  return {};
-}
diff --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -split-input-file -pass-pipeline='convert-gpu-to-spirv{workgroup-size=32,4}' %s -o - | FileCheck %s
+// RUN: mlir-opt -split-input-file -convert-gpu-to-spirv %s -o - | FileCheck %s
 
 module attributes {gpu.container_module} {
   func @builtin() {
@@ -11,7 +11,7 @@
   // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
   gpu.module @kernels {
     gpu.func @builtin_workgroup_id_x()
-      attributes {gpu.kernel} {
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
       // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
       // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
@@ -34,7 +34,7 @@
   // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
   gpu.module @kernels {
     gpu.func @builtin_workgroup_id_y()
-      attributes {gpu.kernel} {
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
       // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
       // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}}
@@ -57,7 +57,7 @@
   // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
   gpu.module @kernels {
     gpu.func @builtin_workgroup_id_z()
-      attributes {gpu.kernel} {
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
       // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
       // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}}
@@ -79,8 +79,11 @@
   // CHECK-LABEL:  spv.module "Logical" "GLSL450"
   gpu.module @kernels {
     gpu.func @builtin_workgroup_size_x()
-      attributes {gpu.kernel} {
-      // The constant value is obtained fomr the command line option above.
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 1, 1]>: vector<3xi32>}} {
+      // The constant value is obtained from the spv.entry_point_abi.
+      // Note that this ignores the workgroup size specification in gpu.launch.
+      // We may want to define gpu.workgroup_size and convert it to the entry
+      // point ABI we want here.
       // CHECK: spv.constant 32 : i32
       %0 = "gpu.block_dim"() {dimension = "x"} : () -> index
       gpu.return
@@ -100,8 +103,8 @@
   // CHECK-LABEL:  spv.module "Logical" "GLSL450"
   gpu.module @kernels {
     gpu.func @builtin_workgroup_size_y()
-      attributes {gpu.kernel} {
-      // The constant value is obtained fomr the command line option above.
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
+      // The constant value is obtained from the spv.entry_point_abi.
       // CHECK: spv.constant 4 : i32
       %0 = "gpu.block_dim"() {dimension = "y"} : () -> index
       gpu.return
@@ -121,8 +124,8 @@
   // CHECK-LABEL:  spv.module "Logical" "GLSL450"
   gpu.module @kernels {
     gpu.func @builtin_workgroup_size_z()
-      attributes {gpu.kernel} {
-      // The constant value is obtained fomr the command line option above (1 is default).
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
+      // The constant value is obtained from the spv.entry_point_abi.
       // CHECK: spv.constant 1 : i32
       %0 = "gpu.block_dim"() {dimension = "z"} : () -> index
       gpu.return
@@ -143,7 +146,7 @@
   // CHECK: spv.globalVariable [[LOCALINVOCATIONID:@.*]] built_in("LocalInvocationId")
   gpu.module @kernels {
     gpu.func @builtin_local_id_x()
-      attributes {gpu.kernel} {
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[ADDRESS:%.*]] = spv._address_of [[LOCALINVOCATIONID]]
       // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
       // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
@@ -166,7 +169,7 @@
   // CHECK: spv.globalVariable [[NUMWORKGROUPS:@.*]] built_in("NumWorkgroups")
   gpu.module @kernels {
     gpu.func @builtin_num_workgroups_x()
-      attributes {gpu.kernel} {
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMWORKGROUPS]]
       // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
       // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
diff --git a/mlir/test/Conversion/GPUToSPIRV/if.mlir b/mlir/test/Conversion/GPUToSPIRV/if.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/if.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/if.mlir
@@ -10,7 +10,7 @@
   gpu.module @kernels {
     // CHECK-LABEL: @kernel_simple_selection
     gpu.func @kernel_simple_selection(%arg2 : memref<10xf32>, %arg3 : i1)
-    attributes {gpu.kernel} {
+    attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       %value = constant 0.0 : f32
       %i = constant 0 : index
 
@@ -31,7 +31,7 @@
 
     // CHECK-LABEL: @kernel_nested_selection
     gpu.func @kernel_nested_selection(%arg3 : memref<10xf32>, %arg4 : memref<10xf32>, %arg5 : i1, %arg6 : i1)
-    attributes {gpu.kernel} {
+    attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       %i = constant 0 : index
       %j = constant 9 : index
 
diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
@@ -29,7 +29,7 @@
     // CHECK-SAME: [[ARG5:%.*]]: i32 {spv.interface_var_abi = {binding = 5 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
     // CHECK-SAME: [[ARG6:%.*]]: i32 {spv.interface_var_abi = {binding = 6 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
     gpu.func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index)
-      attributes  {gpu.kernel} {
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[ADDRESSWORKGROUPID:%.*]] = spv._address_of [[WORKGROUPIDVAR]]
       // CHECK: [[WORKGROUPID:%.*]] = spv.Load "Input" [[ADDRESSWORKGROUPID]]
       // CHECK: [[WORKGROUPIDX:%.*]] = spv.CompositeExtract [[WORKGROUPID]]{{\[}}0 : i32{{\]}}
diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
@@ -9,7 +9,7 @@
 
   gpu.module @kernels {
     gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>)
-    attributes {gpu.kernel} {
+    attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
       // CHECK: [[LB:%.*]] = spv.constant 4 : i32
       %lb = constant 4 : index
       // CHECK: [[UB:%.*]] = spv.constant 42 : i32
diff --git a/mlir/test/Conversion/GPUToSPIRV/simple.mlir b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/simple.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
@@ -1,25 +1,46 @@
-// RUN: mlir-opt -pass-pipeline='convert-gpu-to-spirv{workgroup-size=32,4}' %s -o - | FileCheck %s
+// RUN: mlir-opt -split-input-file -convert-gpu-to-spirv -verify-diagnostics %s -o - | FileCheck %s
 
 module attributes {gpu.container_module} {
-
   gpu.module @kernels {
     // CHECK:       spv.module "Logical" "GLSL450" {
-    // CHECK-LABEL: func @kernel_1
+    // CHECK-LABEL: func @basic_module_structure
     // CHECK-SAME: {{%.*}}: f32 {spv.interface_var_abi = {binding = 0 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
     // CHECK-SAME: {{%.*}}: !spv.ptr<!spv.struct<!spv.array<12 x f32 [4]> [0]>, StorageBuffer> {spv.interface_var_abi = {binding = 1 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
     // CHECK-SAME: spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}
-    gpu.func @kernel_1(%arg0 : f32, %arg1 : memref<12xf32>) attributes {gpu.kernel} {
+    gpu.func @basic_module_structure(%arg0 : f32, %arg1 : memref<12xf32>)
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
       // CHECK: spv.Return
       gpu.return
     }
     // CHECK: attributes {capabilities = ["Shader"], extensions = ["SPV_KHR_storage_buffer_storage_class"]}
   }
 
-  func @foo() {
+  func @main() {
+    %0 = "op"() : () -> (f32)
+    %1 = "op"() : () -> (memref<12xf32>)
+    %cst = constant 1 : index
+    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "basic_module_structure", kernel_module = @kernels }
+        : (index, index, index, index, index, index, f32, memref<12xf32>) -> ()
+    return
+  }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+  gpu.module @kernels {
+    // expected-error @below {{failed to legalize operation 'gpu.func'}}
+    // expected-remark @below {{match failure: missing 'spv.entry_point_abi' attribute}}
+    gpu.func @missing_entry_point_abi(%arg0 : f32, %arg1 : memref<12xf32>) attributes {gpu.kernel} {
+      gpu.return
+    }
+  }
+
+  func @main() {
     %0 = "op"() : () -> (f32)
     %1 = "op"() : () -> (memref<12xf32>)
     %cst = constant 1 : index
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "kernel_1", kernel_module = @kernels }
+    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "missing_entry_point_abi", kernel_module = @kernels }
         : (index, index, index, index, index, index, f32, memref<12xf32>) -> ()
     return
   }