diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/TargetAndABI.td b/mlir/include/mlir/Dialect/SPIRV/IR/TargetAndABI.td --- a/mlir/include/mlir/Dialect/SPIRV/IR/TargetAndABI.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/TargetAndABI.td @@ -27,7 +27,7 @@ // points in the generated SPIR-V module: // 1) WorkGroup Size. def SPV_EntryPointABIAttr : StructAttr<"EntryPointABIAttr", SPIRV_Dialect, [ - StructFieldAttr<"local_size", I32ElementsAttr> + StructFieldAttr<"local_size", OptionalAttr> ]>; def SPV_ExtensionArrayAttr : TypedArrayAttrBase< diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp --- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp @@ -55,7 +55,8 @@ /// attribute on the surrounding FuncOp is used to replace the gpu::BlockDimOp. class WorkGroupSizeConversion : public OpConversionPattern { public: - using OpConversionPattern::OpConversionPattern; + WorkGroupSizeConversion(TypeConverter &typeConverter, MLIRContext *context) + : OpConversionPattern(typeConverter, context, /*benefit*/ 10) {} LogicalResult matchAndRewrite(gpu::BlockDimOp op, OpAdaptor adaptor, @@ -149,6 +150,9 @@ gpu::BlockDimOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { auto workGroupSizeAttr = spirv::lookupLocalWorkGroupSize(op); + if (!workGroupSizeAttr) + return failure(); + auto val = workGroupSizeAttr .getValues()[static_cast(op.dimension())]; auto convertedType = @@ -337,6 +341,7 @@ GPUReturnOpConversion, LaunchConfigConversion, LaunchConfigConversion, + LaunchConfigConversion, LaunchConfigConversion, SingleDimLaunchConfigConversion localSize, MLIRContext *context) { + if (localSize.empty()) + return spirv::EntryPointABIAttr::get(nullptr, context); + assert(localSize.size() == 3); return spirv::EntryPointABIAttr::get( DenseElementsAttr::get( diff --git a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp --- a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp @@ -136,10 +136,13 @@ // Specifies the spv.ExecutionModeOp. auto localSizeAttr = entryPointAttr.local_size(); - SmallVector localSize(localSizeAttr.getValues()); - builder.create( - funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize); - funcOp->removeAttr(entryPointAttrName); + if (localSizeAttr) { + auto values = localSizeAttr.getValues(); + SmallVector localSize(values); + builder.create( + funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize); + funcOp->removeAttr(entryPointAttrName); + } return success(); } diff --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir --- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir @@ -223,6 +223,78 @@ // ----- +module attributes {gpu.container_module} { + func @builtin() { + %c0 = arith.constant 1 : index + gpu.launch_func @kernels::@builtin_workgroup_size_x + blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0) + return + } + + // CHECK-LABEL: spv.module @{{.*}} + // CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize") + gpu.module @kernels { + gpu.func @builtin_workgroup_size_x() kernel + attributes {spv.entry_point_abi = {}} { + // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]] + // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] + // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}} + %0 = gpu.block_dim x + gpu.return + } + } +} + +// ----- + +module attributes {gpu.container_module} { + func @builtin() { + %c0 = arith.constant 1 : index + gpu.launch_func @kernels::@builtin_workgroup_size_y + blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0) + return + } + + // CHECK-LABEL: spv.module @{{.*}} + // CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize") + gpu.module @kernels { + gpu.func @builtin_workgroup_size_y() kernel + attributes {spv.entry_point_abi = {}} { + // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]] + // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] + // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}} + %0 = gpu.block_dim y + gpu.return + } + } +} + +// ----- + +module attributes {gpu.container_module} { + func @builtin() { + %c0 = arith.constant 1 : index + gpu.launch_func @kernels::@builtin_workgroup_size_z + blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0) + return + } + + // CHECK-LABEL: spv.module @{{.*}} + // CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize") + gpu.module @kernels { + gpu.func @builtin_workgroup_size_z() kernel + attributes {spv.entry_point_abi = {}} { + // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]] + // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] + // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}} + %0 = gpu.block_dim z + gpu.return + } + } +} + +// ----- + module attributes {gpu.container_module} { // CHECK-LABEL: spv.module @{{.*}} Logical GLSL450 // CHECK: spv.GlobalVariable [[SUBGROUPSIZE:@.*]] built_in("SubgroupSize")