diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -86,6 +86,55 @@ }]; } +def GPU_SubgroupIdOp : GPU_Op<"subgroup_id", [NoSideEffect]>, + Arguments<(ins)>, Results<(outs Index:$result)> { + let description = [{ + Returns the subgroup id, i.e. the index of the current subgroup within the + workgroup. + + Example: + + ```mlir + %sgId = gpu.subgroup_id : index + ``` + }]; + + let assemblyFormat = "attr-dict `:` type($result)"; + let verifier = [{ return success(); }]; +} + +def GPU_NumSubgroupsOp : GPU_Op<"num_subgroups", [NoSideEffect]>, + Arguments<(ins)>, Results<(outs Index:$result)> { + let description = [{ + Returns the number of subgroups within a workgroup. + + Example: + + ```mlir + %numSg = gpu.num_subgroups : index + ``` + }]; + + let assemblyFormat = "attr-dict `:` type($result)"; + let verifier = [{ return success(); }]; +} + +def GPU_SubgroupSizeOp : GPU_Op<"subgroup_size", [NoSideEffect]>, + Arguments<(ins)>, Results<(outs Index:$result)> { + let description = [{ + Returns the number of threads within a subgroup. + + Example: + + ```mlir + %sgSz = gpu.subgroup_size : index + ``` + }]; + + let assemblyFormat = "attr-dict `:` type($result)"; + let verifier = [{ return success(); }]; +} + def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">, AutomaticAllocationScope, FunctionLike, IsolatedFromAbove, Symbol]> { diff --git a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp --- a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp @@ -67,6 +67,18 @@ ConversionPatternRewriter &rewriter) const override; }; +/// Pattern lowering subgoup size/id to loading SPIR-V invocation +/// builtin variables. +template +class SingleDimLaunchConfigConversion : public SPIRVOpLowering { +public: + using SPIRVOpLowering::SPIRVOpLowering; + + LogicalResult + matchAndRewrite(SourceOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; + /// This is separate because in Vulkan workgroup size is exposed to shaders via /// a constant with WorkgroupSize decoration. So here we cannot generate a /// builtin variable; instead the information in the `spv.entry_point_abi` @@ -276,6 +288,16 @@ return success(); } +template +LogicalResult +SingleDimLaunchConfigConversion::matchAndRewrite( + SourceOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + auto spirvBuiltin = spirv::getBuiltinVariableValue(op, builtin, rewriter); + rewriter.replaceOp(op, spirvBuiltin); + return success(); +} + LogicalResult WorkGroupSizeConversion::matchAndRewrite( gpu::BlockDimOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const { @@ -457,5 +479,11 @@ LaunchConfigConversion, LaunchConfigConversion, + SingleDimLaunchConfigConversion, + SingleDimLaunchConfigConversion, + SingleDimLaunchConfigConversion, TerminatorOpConversion, WorkGroupSizeConversion>(context, typeConverter); } diff --git a/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp b/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp --- a/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp @@ -551,6 +551,16 @@ builder.create(loc, ptrType, name, builtin); break; } + case spirv::BuiltIn::SubgroupId: + case spirv::BuiltIn::NumSubgroups: + case spirv::BuiltIn::SubgroupSize: { + auto ptrType = spirv::PointerType::get(builder.getIntegerType(32), + spirv::StorageClass::Input); + std::string name = getBuiltinVarName(builtin); + newVarOp = + builder.create(loc, ptrType, name, builtin); + break; + } default: emitError(loc, "unimplemented builtin variable generation for ") << stringifyBuiltIn(builtin); diff --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir --- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir @@ -178,3 +178,51 @@ } } } + +// ----- + +module attributes {gpu.container_module} { + // CHECK-LABEL: spv.module Logical GLSL450 + // CHECK: spv.globalVariable [[SUBGROUPID:@.*]] built_in("SubgroupId") + gpu.module @kernels { + gpu.func @builtin_subgroup_id() kernel + attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { + // CHECK: [[ADDRESS:%.*]] = spv._address_of [[SUBGROUPID]] + // CHECK-NEXT: {{%.*}} = spv.Load "Input" [[ADDRESS]] + %0 = gpu.subgroup_id : index + gpu.return + } + } +} + +// ----- + +module attributes {gpu.container_module} { + // CHECK-LABEL: spv.module Logical GLSL450 + // CHECK: spv.globalVariable [[NUMSUBGROUPS:@.*]] built_in("NumSubgroups") + gpu.module @kernels { + gpu.func @builtin_num_subgroups() kernel + attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { + // CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMSUBGROUPS]] + // CHECK-NEXT: {{%.*}} = spv.Load "Input" [[ADDRESS]] + %0 = gpu.num_subgroups : index + gpu.return + } + } +} + +// ----- + +module attributes {gpu.container_module} { + // CHECK-LABEL: spv.module Logical GLSL450 + // CHECK: spv.globalVariable [[SUBGROUPSIZE:@.*]] built_in("SubgroupSize") + gpu.module @kernels { + gpu.func @builtin_subgroup_size() kernel + attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { + // CHECK: [[ADDRESS:%.*]] = spv._address_of [[SUBGROUPSIZE]] + // CHECK-NEXT: {{%.*}} = spv.Load "Input" [[ADDRESS]] + %0 = gpu.subgroup_size : index + gpu.return + } + } +} diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -44,6 +44,10 @@ %gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index) %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index) + %sgId = gpu.subgroup_id : index + %numSg = gpu.num_subgroups : index + %SgSi = gpu.subgroup_size : index + %one = constant 1.0 : f32 %sum = "gpu.all_reduce"(%one) ({}) {op = "add"} : (f32) -> (f32)