diff --git a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h --- a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h +++ b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h @@ -17,13 +17,13 @@ namespace mlir { class SPIRVTypeConverter; + /// Appends to a pattern list additional patterns for translating GPU Ops to -/// SPIR-V ops. Needs the workgroup size as input since SPIR-V/Vulkan requires -/// the workgroup size to be statically specified. +/// SPIR-V ops. For a gpu.func to be converted, it should have a +/// spv.entry_point_abi attribute. void populateGPUToSPIRVPatterns(MLIRContext *context, SPIRVTypeConverter &typeConverter, - OwningRewritePatternList &patterns, - ArrayRef workGroupSize); + OwningRewritePatternList &patterns); } // namespace mlir #endif // MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRV_H diff --git a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h --- a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h +++ b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h @@ -22,10 +22,9 @@ class ModuleOp; template class OpPassBase; -/// Pass to convert GPU Ops to SPIR-V ops. Needs the workgroup size as input -/// since SPIR-V/Vulkan requires the workgroup size to be statically specified. -std::unique_ptr> -createConvertGPUToSPIRVPass(ArrayRef workGroupSize); +/// Pass to convert GPU Ops to SPIR-V ops. For a gpu.func to be converted, it +/// should have a spv.entry_point_abi attribute. +std::unique_ptr> createConvertGPUToSPIRVPass(); } // namespace mlir #endif // MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRVPASS_H diff --git a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h --- a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h +++ b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h @@ -112,6 +112,15 @@ EntryPointABIAttr getEntryPointABIAttr(ArrayRef localSize, MLIRContext *context); +/// Queries the entry point ABI on the nearest function-like op containing the +/// given `op`. Returns null attribute if not found. +EntryPointABIAttr lookupEntryPointABI(Operation *op); + +/// Queries the local workgroup size from entry point ABI on the nearest +/// function-like op containing the given `op`. Returns null attribute if not +/// found. +DenseIntElementsAttr lookupLocalWorkGroupSize(Operation *op); + /// Returns a default resource limits attribute that uses numbers from /// "Table 46. Required Limits" of the Vulkan spec. ResourceLimitsAttr getDefaultResourceLimits(MLIRContext *context); @@ -128,11 +137,6 @@ /// extensions) if not provided. TargetEnvAttr lookupTargetEnvOrDefault(Operation *op); -/// Queries the local workgroup size from entry point ABI on the nearest -/// function-like op containing the given `op`. Returns null attribute if not -/// found. -DenseIntElementsAttr lookupLocalWorkGroupSize(Operation *op); - } // namespace spirv } // namespace mlir diff --git a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp --- a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp @@ -82,16 +82,9 @@ }; /// Pattern to convert a kernel function in GPU dialect within a spv.module. -class KernelFnConversion final : public SPIRVOpLowering { +class GPUFuncOpConversion final : public SPIRVOpLowering { public: - KernelFnConversion(MLIRContext *context, SPIRVTypeConverter &converter, - ArrayRef workGroupSize, - PatternBenefit benefit = 1) - : SPIRVOpLowering(context, converter, benefit) { - auto config = workGroupSize.take_front(3); - workGroupSizeAsInt32.assign(config.begin(), config.end()); - workGroupSizeAsInt32.resize(3, 1); - } + using SPIRVOpLowering::SPIRVOpLowering; PatternMatchResult matchAndRewrite(gpu::GPUFuncOp funcOp, ArrayRef operands, @@ -352,13 +345,11 @@ return newFuncOp; } -PatternMatchResult -KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp, - ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - if (!gpu::GPUDialect::isKernel(funcOp)) { +PatternMatchResult GPUFuncOpConversion::matchAndRewrite( + gpu::GPUFuncOp funcOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + if (!gpu::GPUDialect::isKernel(funcOp)) return matchFailure(); - } SmallVector argABI; for (auto argNum : llvm::seq(0, funcOp.getNumArguments())) { @@ -366,14 +357,15 @@ 0, argNum, spirv::StorageClass::StorageBuffer, rewriter.getContext())); } - auto context = rewriter.getContext(); - auto entryPointAttr = - spirv::getEntryPointABIAttr(workGroupSizeAsInt32, context); + auto entryPointAttr = spirv::lookupEntryPointABI(funcOp); + if (!entryPointAttr) { + funcOp.emitRemark("match failure: missing 'spv.entry_point_abi' attribute"); + return matchFailure(); + } FuncOp newFuncOp = lowerAsEntryFunction(funcOp, typeConverter, rewriter, entryPointAttr, argABI); - if (!newFuncOp) { + if (!newFuncOp) return matchFailure(); - } newFuncOp.removeAttr(Identifier::get(gpu::GPUDialect::getKernelFuncAttrName(), rewriter.getContext())); return matchSuccess(); @@ -429,13 +421,11 @@ void mlir::populateGPUToSPIRVPatterns(MLIRContext *context, SPIRVTypeConverter &typeConverter, - OwningRewritePatternList &patterns, - ArrayRef workGroupSize) { + OwningRewritePatternList &patterns) { populateWithGenerated(context, &patterns); - patterns.insert(context, typeConverter, workGroupSize); patterns.insert< - ForOpConversion, GPUModuleConversion, GPUReturnOpConversion, - IfOpConversion, + ForOpConversion, GPUFuncOpConversion, GPUModuleConversion, + GPUReturnOpConversion, IfOpConversion, LaunchConfigConversion, LaunchConfigConversion, LaunchConfigConversion { -public: - GPUToSPIRVPass() = default; - GPUToSPIRVPass(const GPUToSPIRVPass &) {} - GPUToSPIRVPass(ArrayRef workGroupSize) { - this->workGroupSize = workGroupSize; - } - +struct GPUToSPIRVPass : public ModulePass { void runOnModule() override; - -private: - /// Command line option to specify the workgroup size. - ListOption workGroupSize{ - *this, "workgroup-size", - llvm::cl::desc( - "Workgroup Sizes in the SPIR-V module for x, followed by y, followed " - "by z dimension of the dispatch (others will be ignored)"), - llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated}; }; } // namespace @@ -70,7 +54,7 @@ SPIRVTypeConverter typeConverter; OwningRewritePatternList patterns; - populateGPUToSPIRVPatterns(context, typeConverter, patterns, workGroupSize); + populateGPUToSPIRVPatterns(context, typeConverter, patterns); populateStandardToSPIRVPatterns(context, typeConverter, patterns); std::unique_ptr target = spirv::SPIRVConversionTarget::get( @@ -84,9 +68,8 @@ } } -std::unique_ptr> -mlir::createConvertGPUToSPIRVPass(ArrayRef workGroupSize) { - return std::make_unique(workGroupSize); +std::unique_ptr> mlir::createConvertGPUToSPIRVPass() { + return std::make_unique(); } static PassRegistration diff --git a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp --- a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp +++ b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp @@ -158,6 +158,26 @@ context); } +spirv::EntryPointABIAttr spirv::lookupEntryPointABI(Operation *op) { + while (op && !op->hasTrait()) + op = op->getParentOp(); + if (!op) + return {}; + + if (auto attr = op->getAttrOfType( + spirv::getEntryPointABIAttrName())) + return attr; + + return {}; +} + +DenseIntElementsAttr spirv::lookupLocalWorkGroupSize(Operation *op) { + if (auto entryPoint = spirv::lookupEntryPointABI(op)) + return entryPoint.local_size(); + + return {}; +} + spirv::ResourceLimitsAttr spirv::getDefaultResourceLimits(MLIRContext *context) { auto i32Type = IntegerType::get(32, context); @@ -187,16 +207,3 @@ return attr; return getDefaultTargetEnv(op->getContext()); } - -DenseIntElementsAttr spirv::lookupLocalWorkGroupSize(Operation *op) { - while (op && !op->hasTrait()) - op = op->getParentOp(); - if (!op) - return {}; - - if (auto attr = op->getAttrOfType( - spirv::getEntryPointABIAttrName())) - return attr.local_size(); - - return {}; -} diff --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir --- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -split-input-file -pass-pipeline='convert-gpu-to-spirv{workgroup-size=32,4}' %s -o - | FileCheck %s +// RUN: mlir-opt -split-input-file -convert-gpu-to-spirv %s -o - | FileCheck %s module attributes {gpu.container_module} { func @builtin() { @@ -11,7 +11,7 @@ // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId") gpu.module @kernels { gpu.func @builtin_workgroup_id_x() - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}} @@ -34,7 +34,7 @@ // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId") gpu.module @kernels { gpu.func @builtin_workgroup_id_y() - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}} @@ -57,7 +57,7 @@ // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId") gpu.module @kernels { gpu.func @builtin_workgroup_id_z() - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}} @@ -79,8 +79,11 @@ // CHECK-LABEL: spv.module "Logical" "GLSL450" gpu.module @kernels { gpu.func @builtin_workgroup_size_x() - attributes {gpu.kernel} { - // The constant value is obtained fomr the command line option above. + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 1, 1]>: vector<3xi32>}} { + // The constant value is obtained from the spv.entry_point_abi. + // Note that this ignores the workgroup size specification in gpu.launch. + // We may want to define gpu.workgroup_size and convert it to the entry + // point ABI we want here. // CHECK: spv.constant 32 : i32 %0 = "gpu.block_dim"() {dimension = "x"} : () -> index gpu.return @@ -100,8 +103,8 @@ // CHECK-LABEL: spv.module "Logical" "GLSL450" gpu.module @kernels { gpu.func @builtin_workgroup_size_y() - attributes {gpu.kernel} { - // The constant value is obtained fomr the command line option above. + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} { + // The constant value is obtained from the spv.entry_point_abi. // CHECK: spv.constant 4 : i32 %0 = "gpu.block_dim"() {dimension = "y"} : () -> index gpu.return @@ -121,8 +124,8 @@ // CHECK-LABEL: spv.module "Logical" "GLSL450" gpu.module @kernels { gpu.func @builtin_workgroup_size_z() - attributes {gpu.kernel} { - // The constant value is obtained fomr the command line option above (1 is default). + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} { + // The constant value is obtained from the spv.entry_point_abi. // CHECK: spv.constant 1 : i32 %0 = "gpu.block_dim"() {dimension = "z"} : () -> index gpu.return @@ -143,7 +146,7 @@ // CHECK: spv.globalVariable [[LOCALINVOCATIONID:@.*]] built_in("LocalInvocationId") gpu.module @kernels { gpu.func @builtin_local_id_x() - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[ADDRESS:%.*]] = spv._address_of [[LOCALINVOCATIONID]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}} @@ -166,7 +169,7 @@ // CHECK: spv.globalVariable [[NUMWORKGROUPS:@.*]] built_in("NumWorkgroups") gpu.module @kernels { gpu.func @builtin_num_workgroups_x() - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMWORKGROUPS]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}} diff --git a/mlir/test/Conversion/GPUToSPIRV/if.mlir b/mlir/test/Conversion/GPUToSPIRV/if.mlir --- a/mlir/test/Conversion/GPUToSPIRV/if.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/if.mlir @@ -10,7 +10,7 @@ gpu.module @kernels { // CHECK-LABEL: @kernel_simple_selection gpu.func @kernel_simple_selection(%arg2 : memref<10xf32>, %arg3 : i1) - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { %value = constant 0.0 : f32 %i = constant 0 : index @@ -31,7 +31,7 @@ // CHECK-LABEL: @kernel_nested_selection gpu.func @kernel_nested_selection(%arg3 : memref<10xf32>, %arg4 : memref<10xf32>, %arg5 : i1, %arg6 : i1) - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { %i = constant 0 : index %j = constant 9 : index diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir --- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir @@ -29,7 +29,7 @@ // CHECK-SAME: [[ARG5:%.*]]: i32 {spv.interface_var_abi = {binding = 5 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}} // CHECK-SAME: [[ARG6:%.*]]: i32 {spv.interface_var_abi = {binding = 6 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}} gpu.func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index) - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[ADDRESSWORKGROUPID:%.*]] = spv._address_of [[WORKGROUPIDVAR]] // CHECK: [[WORKGROUPID:%.*]] = spv.Load "Input" [[ADDRESSWORKGROUPID]] // CHECK: [[WORKGROUPIDX:%.*]] = spv.CompositeExtract [[WORKGROUPID]]{{\[}}0 : i32{{\]}} diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir --- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir @@ -9,7 +9,7 @@ gpu.module @kernels { gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>) - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[LB:%.*]] = spv.constant 4 : i32 %lb = constant 4 : index // CHECK: [[UB:%.*]] = spv.constant 42 : i32 diff --git a/mlir/test/Conversion/GPUToSPIRV/simple.mlir b/mlir/test/Conversion/GPUToSPIRV/simple.mlir --- a/mlir/test/Conversion/GPUToSPIRV/simple.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/simple.mlir @@ -1,25 +1,46 @@ -// RUN: mlir-opt -pass-pipeline='convert-gpu-to-spirv{workgroup-size=32,4}' %s -o - | FileCheck %s +// RUN: mlir-opt -split-input-file -convert-gpu-to-spirv -verify-diagnostics %s -o - | FileCheck %s module attributes {gpu.container_module} { - gpu.module @kernels { // CHECK: spv.module "Logical" "GLSL450" { - // CHECK-LABEL: func @kernel_1 + // CHECK-LABEL: func @basic_module_structure // CHECK-SAME: {{%.*}}: f32 {spv.interface_var_abi = {binding = 0 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}} // CHECK-SAME: {{%.*}}: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = {binding = 1 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}} // CHECK-SAME: spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>} - gpu.func @kernel_1(%arg0 : f32, %arg1 : memref<12xf32>) attributes {gpu.kernel} { + gpu.func @basic_module_structure(%arg0 : f32, %arg1 : memref<12xf32>) + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} { // CHECK: spv.Return gpu.return } // CHECK: attributes {capabilities = ["Shader"], extensions = ["SPV_KHR_storage_buffer_storage_class"]} } - func @foo() { + func @main() { + %0 = "op"() : () -> (f32) + %1 = "op"() : () -> (memref<12xf32>) + %cst = constant 1 : index + "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "basic_module_structure", kernel_module = @kernels } + : (index, index, index, index, index, index, f32, memref<12xf32>) -> () + return + } +} + +// ----- + +module attributes {gpu.container_module} { + gpu.module @kernels { + // expected-error @below {{failed to legalize operation 'gpu.func'}} + // expected-remark @below {{match failure: missing 'spv.entry_point_abi' attribute}} + gpu.func @missing_entry_point_abi(%arg0 : f32, %arg1 : memref<12xf32>) attributes {gpu.kernel} { + gpu.return + } + } + + func @main() { %0 = "op"() : () -> (f32) %1 = "op"() : () -> (memref<12xf32>) %cst = constant 1 : index - "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "kernel_1", kernel_module = @kernels } + "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "missing_entry_point_abi", kernel_module = @kernels } : (index, index, index, index, index, index, f32, memref<12xf32>) -> () return }