Index: mlir/include/mlir/Dialect/GPU/GPUOps.td =================================================================== --- mlir/include/mlir/Dialect/GPU/GPUOps.td +++ mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -86,6 +86,55 @@ }]; } +def GPU_SubgroupIdOp : GPU_Op<"subgroup_id", [NoSideEffect]>, + Arguments<(ins)>, Results<(outs Index:$result)> { + let description = [{ + Returns the subgroup id, i.e. the index of the current subgroup within the + block. + + Example: + + ```mlir + %sgId = gpu.subgroup_id : index + ``` + }]; + + let assemblyFormat = "attr-dict `:` type($result)"; + let verifier = [{ return success(); }]; +} + +def GPU_NumSubgroupsOp : GPU_Op<"num_subgroups", [NoSideEffect]>, + Arguments<(ins)>, Results<(outs Index:$result)> { + let description = [{ + Returns the number of subgroups within a block. + + Example: + + ```mlir + %sgId = gpu.num_subgroups : index + ``` + }]; + + let assemblyFormat = "attr-dict `:` type($result)"; + let verifier = [{ return success(); }]; +} + +def GPU_SubgroupSizeOp : GPU_Op<"subgroup_size", [NoSideEffect]>, + Arguments<(ins)>, Results<(outs Index:$result)> { + let description = [{ + Returns the number of threads within a subgroup. + + Example: + + ```mlir + %sgId = gpu.subgroup_size : index + ``` + }]; + + let assemblyFormat = "attr-dict `:` type($result)"; + let verifier = [{ return success(); }]; +} + def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">, AutomaticAllocationScope, FunctionLike, IsolatedFromAbove, Symbol]> { Index: mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp =================================================================== --- mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp +++ mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp @@ -67,6 +67,18 @@ ConversionPatternRewriter &rewriter) const override; }; +/// Pattern lowering subgoup size/id to loading SPIR-V invocation +/// builtin variables. +template +class LaunchConfigConversionSingleDim : public SPIRVOpLowering { +public: + using SPIRVOpLowering::SPIRVOpLowering; + + LogicalResult + matchAndRewrite(SourceOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; + /// This is separate because in Vulkan workgroup size is exposed to shaders via /// a constant with WorkgroupSize decoration. So here we cannot generate a /// builtin variable; instead the information in the `spv.entry_point_abi` @@ -276,6 +288,16 @@ return success(); } +template +LogicalResult +LaunchConfigConversionSingleDim::matchAndRewrite( + SourceOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + auto spirvBuiltin = spirv::getBuiltinVariableValue(op, builtin, rewriter); + rewriter.replaceOp(op, spirvBuiltin); + return success(); +} + LogicalResult WorkGroupSizeConversion::matchAndRewrite( gpu::BlockDimOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const { @@ -457,5 +479,11 @@ LaunchConfigConversion, LaunchConfigConversion, + LaunchConfigConversionSingleDim, + LaunchConfigConversionSingleDim, + LaunchConfigConversionSingleDim, TerminatorOpConversion, WorkGroupSizeConversion>(context, typeConverter); } Index: mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp =================================================================== --- mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp +++ mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp @@ -551,6 +551,16 @@ builder.create(loc, ptrType, name, builtin); break; } + case spirv::BuiltIn::SubgroupId: + case spirv::BuiltIn::NumSubgroups: + case spirv::BuiltIn::SubgroupSize: { + auto ptrType = spirv::PointerType::get(builder.getIntegerType(32), + spirv::StorageClass::Input); + std::string name = getBuiltinVarName(builtin); + newVarOp = + builder.create(loc, ptrType, name, builtin); + break; + } default: emitError(loc, "unimplemented builtin variable generation for ") << stringifyBuiltIn(builtin); Index: mlir/test/Conversion/GPUToSPIRV/builtins.mlir =================================================================== --- mlir/test/Conversion/GPUToSPIRV/builtins.mlir +++ mlir/test/Conversion/GPUToSPIRV/builtins.mlir @@ -178,3 +178,69 @@ } } } + +// ----- + +module attributes {gpu.container_module} { + func @builtin() { + %c0 = constant 1 : index + "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_subgroup_id} : (index, index, index, index, index, index) -> () + return + } + + // CHECK-LABEL: spv.module Logical GLSL450 + // CHECK: spv.globalVariable [[SUBGROUPID:@.*]] built_in("SubgroupId") + gpu.module @kernels { + gpu.func @builtin_subgroup_id() kernel + attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { + // CHECK: [[ADDRESS:%.*]] = spv._address_of [[SUBGROUPID]] + // CHECK-NEXT: {{%.*}} = spv.Load "Input" [[ADDRESS]] + %0 = gpu.subgroup_id : index + gpu.return + } + } +} + +// ----- + +module attributes {gpu.container_module} { + func @builtin() { + %c0 = constant 1 : index + "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_num_subgroups} : (index, index, index, index, index, index) -> () + return + } + + // CHECK-LABEL: spv.module Logical GLSL450 + // CHECK: spv.globalVariable [[NUMSUBGROUPS:@.*]] built_in("NumSubgroups") + gpu.module @kernels { + gpu.func @builtin_num_subgroups() kernel + attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { + // CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMSUBGROUPS]] + // CHECK-NEXT: {{%.*}} = spv.Load "Input" [[ADDRESS]] + %0 = gpu.num_subgroups : index + gpu.return + } + } +} + +// ----- + +module attributes {gpu.container_module} { + func @builtin() { + %c0 = constant 1 : index + "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_subgroup_size} : (index, index, index, index, index, index) -> () + return + } + + // CHECK-LABEL: spv.module Logical GLSL450 + // CHECK: spv.globalVariable [[SUBGROUPSIZE:@.*]] built_in("SubgroupSize") + gpu.module @kernels { + gpu.func @builtin_subgroup_size() kernel + attributes {spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { + // CHECK: [[ADDRESS:%.*]] = spv._address_of [[SUBGROUPSIZE]] + // CHECK-NEXT: {{%.*}} = spv.Load "Input" [[ADDRESS]] + %0 = gpu.subgroup_size : index + gpu.return + } + } +} Index: mlir/test/Dialect/GPU/ops.mlir =================================================================== --- mlir/test/Dialect/GPU/ops.mlir +++ mlir/test/Dialect/GPU/ops.mlir @@ -44,6 +44,10 @@ %gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index) %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index) + %sgId = gpu.subgroup_id : index + %numSg = gpu.num_subgroups : index + %SgSi = gpu.subgroup_size : index + %one = constant 1.0 : f32 %sum = "gpu.all_reduce"(%one) ({}) {op = "add"} : (f32) -> (f32)