diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h @@ -22,6 +22,7 @@ #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/SymbolTable.h" +#include "mlir/Interfaces/InferIntRangeInterface.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -20,6 +20,7 @@ include "mlir/IR/FunctionInterfaces.td" include "mlir/IR/SymbolInterfaces.td" include "mlir/Interfaces/DataLayoutInterfaces.td" +include "mlir/Interfaces/InferIntRangeInterface.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" @@ -43,7 +44,8 @@ def GPU_DimensionAttr : EnumAttr; class GPU_IndexOp traits = []> : - GPU_Op, + GPU_Op])>, Arguments<(ins GPU_DimensionAttr:$dimension)>, Results<(outs Index)> { let assemblyFormat = "$dimension attr-dict"; } @@ -97,7 +99,8 @@ }]; } -def GPU_LaneIdOp : GPU_Op<"lane_id", [NoSideEffect]> { +def GPU_LaneIdOp : GPU_Op<"lane_id", [ + NoSideEffect, DeclareOpInterfaceMethods]> { let description = [{ Returns the lane id within the subgroup (warp/wave). @@ -110,7 +113,8 @@ let assemblyFormat = "attr-dict"; } -def GPU_SubgroupIdOp : GPU_Op<"subgroup_id", [NoSideEffect]>, +def GPU_SubgroupIdOp : GPU_Op<"subgroup_id", [ + NoSideEffect, DeclareOpInterfaceMethods]>, Arguments<(ins)>, Results<(outs Index:$result)> { let description = [{ Returns the subgroup id, i.e. the index of the current subgroup within the @@ -141,7 +145,8 @@ } -def GPU_NumSubgroupsOp : GPU_Op<"num_subgroups", [NoSideEffect]>, +def GPU_NumSubgroupsOp : GPU_Op<"num_subgroups", [ + NoSideEffect, DeclareOpInterfaceMethods]>, Arguments<(ins)>, Results<(outs Index:$result)> { let description = [{ Returns the number of subgroups within a workgroup. @@ -156,7 +161,8 @@ let assemblyFormat = "attr-dict `:` type($result)"; } -def GPU_SubgroupSizeOp : GPU_Op<"subgroup_size", [NoSideEffect]>, +def GPU_SubgroupSizeOp : GPU_Op<"subgroup_size", [ + NoSideEffect, DeclareOpInterfaceMethods]>, Arguments<(ins)>, Results<(outs Index:$result)> { let description = [{ Returns the number of threads within a subgroup. @@ -465,8 +471,9 @@ let hasVerifier = 1; } -def GPU_LaunchOp : GPU_Op<"launch", - [AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface]>, +def GPU_LaunchOp : GPU_Op<"launch", [ + AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface, + DeclareOpInterfaceMethods]>, Arguments<(ins Variadic:$asyncDependencies, Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ, Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ, diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -21,6 +21,7 @@ add_mlir_dialect_library(MLIRGPUOps IR/GPUDialect.cpp + IR/InferIntRangeInterfaceImpls.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU @@ -34,6 +35,7 @@ LINK_LIBS PUBLIC MLIRArithmeticDialect MLIRDLTIDialect + MLIRInferIntRangeInterface MLIRIR MLIRMemRefDialect MLIRSideEffectInterfaces diff --git a/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp b/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp @@ -0,0 +1,97 @@ +//===- InferIntRangeInterfaceImpls.cpp - Integer range impls for gpu -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Interfaces/InferIntRangeInterface.h" + +using namespace mlir; +using namespace mlir::gpu; + +// Maximum grid and block dimensions of all known GPUs are less than 2^32. +static constexpr uint64_t kMaxDim = std::numeric_limits::max(); +// Maximum subgroups are no larger than 128. +static constexpr uint64_t kMaxSubgroupSize = 128; + +static ConstantIntRanges getIndexRange(uint64_t umin, uint64_t umax) { + unsigned width = IndexType::kInternalStorageBitWidth; + return ConstantIntRanges::fromUnsigned(APInt(width, umin), + APInt(width, umax)); +} + +void BlockDimOp::inferResultRanges(ArrayRef, + SetIntRangeFn setResultRange) { + setResultRange(getResult(), getIndexRange(1, kMaxDim)); +} + +void BlockIdOp::inferResultRanges(ArrayRef, + SetIntRangeFn setResultRange) { + setResultRange(getResult(), getIndexRange(0, kMaxDim - 1)); +} + +void GridDimOp::inferResultRanges(ArrayRef, + SetIntRangeFn setResultRange) { + setResultRange(getResult(), getIndexRange(1, kMaxDim)); +} + +void ThreadIdOp::inferResultRanges(ArrayRef, + SetIntRangeFn setResultRange) { + setResultRange(getResult(), getIndexRange(0, kMaxDim - 1)); +} + +void LaneIdOp::inferResultRanges(ArrayRef, + SetIntRangeFn setResultRange) { + setResultRange(getResult(), getIndexRange(0, kMaxSubgroupSize - 1)); +} + +void SubgroupIdOp::inferResultRanges(ArrayRef, + SetIntRangeFn setResultRange) { + setResultRange(getResult(), getIndexRange(0, kMaxDim - 1)); +} + +void GlobalIdOp::inferResultRanges(ArrayRef, + SetIntRangeFn setResultRange) { + setResultRange(getResult(), + getIndexRange(0, std::numeric_limits::max())); +} + +void NumSubgroupsOp::inferResultRanges(ArrayRef, + SetIntRangeFn setResultRange) { + setResultRange(getResult(), getIndexRange(1, kMaxDim)); +} + +void SubgroupSizeOp::inferResultRanges(ArrayRef, + SetIntRangeFn setResultRange) { + setResultRange(getResult(), getIndexRange(1, kMaxSubgroupSize)); +} + +void LaunchOp::inferResultRanges(ArrayRef argRanges, + SetIntRangeFn setResultRange) { + auto setRange = [&](ConstantIntRanges argRange, Value dimResult, + Value idxResult) { + if (argRange.umin().getBitWidth() != IndexType::kInternalStorageBitWidth) + return; + ConstantIntRanges dimRange = + argRange.intersection(getIndexRange(1, kMaxDim)); + setResultRange(dimResult, dimRange); + ConstantIntRanges idxRange = + getIndexRange(0, dimRange.umax().getZExtValue() - 1); + setResultRange(idxResult, idxRange); + }; + + argRanges = argRanges.drop_front(asyncDependencies().size()); + KernelDim3 gridDims = getGridSize(); + KernelDim3 blockIds = getBlockIds(); + setRange(argRanges[0], gridDims.x, blockIds.x); + setRange(argRanges[1], gridDims.y, blockIds.y); + setRange(argRanges[2], gridDims.z, blockIds.z); + KernelDim3 blockDims = getBlockSize(); + KernelDim3 threadIds = getThreadIds(); + setRange(argRanges[3], blockDims.x, threadIds.x); + setRange(argRanges[4], blockDims.y, threadIds.y); + setRange(argRanges[5], blockDims.z, threadIds.z); +} diff --git a/mlir/test/Dialect/GPU/int-range-interface.mlir b/mlir/test/Dialect/GPU/int-range-interface.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/GPU/int-range-interface.mlir @@ -0,0 +1,128 @@ +// RUN: mlir-opt -test-int-range-inference %s | FileCheck %s + +// CHECK-LABEL: func @launch_func +func.func @launch_func(%arg0 : index) { + %0 = test.with_bounds { + umin = 3 : index, umax = 5 : index, + smin = 3 : index, smax = 5 : index + } + %1 = test.with_bounds { + umin = 7 : index, umax = 11 : index, + smin = 7 : index, smax = 11 : index + } + gpu.launch blocks(%block_id_x, %block_id_y, %block_id_z) in (%grid_dim_x = %0, %grid_dim_y = %1, %grid_dim_z = %arg0) + threads(%thread_id_x, %thread_id_y, %thread_id_z) in (%block_dim_x = %arg0, %block_dim_y = %0, %block_dim_z = %1) { + + // CHECK: test.reflect_bounds {smax = 5 : index, smin = 3 : index, umax = 5 : index, umin = 3 : index} + // CHECK: test.reflect_bounds {smax = 11 : index, smin = 7 : index, umax = 11 : index, umin = 7 : index} + // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} + %grid_dim_x0 = test.reflect_bounds %grid_dim_x + %grid_dim_y0 = test.reflect_bounds %grid_dim_y + %grid_dim_z0 = test.reflect_bounds %grid_dim_z + + // CHECK: test.reflect_bounds {smax = 4 : index, smin = 0 : index, umax = 4 : index, umin = 0 : index} + // CHECK: test.reflect_bounds {smax = 10 : index, smin = 0 : index, umax = 10 : index, umin = 0 : index} + // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} + %block_id_x0 = test.reflect_bounds %block_id_x + %block_id_y0 = test.reflect_bounds %block_id_y + %block_id_z0 = test.reflect_bounds %block_id_z + + // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} + // CHECK: test.reflect_bounds {smax = 5 : index, smin = 3 : index, umax = 5 : index, umin = 3 : index} + // CHECK: test.reflect_bounds {smax = 11 : index, smin = 7 : index, umax = 11 : index, umin = 7 : index} + %block_dim_x0 = test.reflect_bounds %block_dim_x + %block_dim_y0 = test.reflect_bounds %block_dim_y + %block_dim_z0 = test.reflect_bounds %block_dim_z + + // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} + // CHECK: test.reflect_bounds {smax = 4 : index, smin = 0 : index, umax = 4 : index, umin = 0 : index} + // CHECK: test.reflect_bounds {smax = 10 : index, smin = 0 : index, umax = 10 : index, umin = 0 : index} + %thread_id_x0 = test.reflect_bounds %thread_id_x + %thread_id_y0 = test.reflect_bounds %thread_id_y + %thread_id_z0 = test.reflect_bounds %thread_id_z + + gpu.terminator + } + + func.return +} + +// CHECK-LABEL: func @kernel +module attributes {gpu.container_module} { + gpu.module @gpu_module { + llvm.func @kernel() attributes {gpu.kernel} { + + %grid_dim_x = gpu.grid_dim x + %grid_dim_y = gpu.grid_dim y + %grid_dim_z = gpu.grid_dim z + + // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} + // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} + // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} + %grid_dim_x0 = test.reflect_bounds %grid_dim_x + %grid_dim_y0 = test.reflect_bounds %grid_dim_y + %grid_dim_z0 = test.reflect_bounds %grid_dim_z + + %block_id_x = gpu.block_id x + %block_id_y = gpu.block_id y + %block_id_z = gpu.block_id z + + // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} + // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} + // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} + %block_id_x0 = test.reflect_bounds %block_id_x + %block_id_y0 = test.reflect_bounds %block_id_y + %block_id_z0 = test.reflect_bounds %block_id_z + + %block_dim_x = gpu.block_dim x + %block_dim_y = gpu.block_dim y + %block_dim_z = gpu.block_dim z + + // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} + // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} + // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} + %block_dim_x0 = test.reflect_bounds %block_dim_x + %block_dim_y0 = test.reflect_bounds %block_dim_y + %block_dim_z0 = test.reflect_bounds %block_dim_z + + %thread_id_x = gpu.thread_id x + %thread_id_y = gpu.thread_id y + %thread_id_z = gpu.thread_id z + + // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} + // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} + // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} + %thread_id_x0 = test.reflect_bounds %thread_id_x + %thread_id_y0 = test.reflect_bounds %thread_id_y + %thread_id_z0 = test.reflect_bounds %thread_id_z + + %global_id_x = gpu.global_id x + %global_id_y = gpu.global_id y + %global_id_z = gpu.global_id z + + // CHECK: test.reflect_bounds {smax = 9223372036854775807 : index, smin = 0 : index, umax = 9223372036854775807 : index, umin = 0 : index} + // CHECK: test.reflect_bounds {smax = 9223372036854775807 : index, smin = 0 : index, umax = 9223372036854775807 : index, umin = 0 : index} + // CHECK: test.reflect_bounds {smax = 9223372036854775807 : index, smin = 0 : index, umax = 9223372036854775807 : index, umin = 0 : index} + %global_id_x0 = test.reflect_bounds %global_id_x + %global_id_y0 = test.reflect_bounds %global_id_y + %global_id_z0 = test.reflect_bounds %global_id_z + + %subgroup_size = gpu.subgroup_size : index + %lane_id = gpu.lane_id + %num_subgroups = gpu.num_subgroups : index + %subgroup_id = gpu.subgroup_id : index + + // CHECK: test.reflect_bounds {smax = 128 : index, smin = 1 : index, umax = 128 : index, umin = 1 : index} + // CHECK: test.reflect_bounds {smax = 127 : index, smin = 0 : index, umax = 127 : index, umin = 0 : index} + // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} + // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} + %subgroup_size0 = test.reflect_bounds %subgroup_size + %lane_id0 = test.reflect_bounds %lane_id + %num_subgroups0 = test.reflect_bounds %num_subgroups + %subgroup_id0 = test.reflect_bounds %subgroup_id + + llvm.return + } + } +} + diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -3490,6 +3490,7 @@ ":DLTIDialectTdFiles", ":DataLayoutInterfacesTdFiles", ":FunctionInterfacesTdFiles", + ":InferIntRangeInterfaceTdFiles", ":LLVMOpsTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", @@ -3581,6 +3582,7 @@ ":GPUBaseIncGen", ":GPUOpsIncGen", ":IR", + ":InferIntRangeInterface", ":InferTypeOpInterface", ":LLVMDialect", ":MemRefDialect",