diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -431,14 +431,19 @@ let hasVerifier = 1; } -def GPU_LaunchFuncOp : GPU_Op<"launch_func", - [GPU_AsyncOpInterface, AttrSizedOperandSegments]>, +def LaunchIndx : AnyTypeOf<[Index, I32, I64]>; + +def GPU_LaunchFuncOp :GPU_Op<"launch_func", [ + GPU_AsyncOpInterface, AttrSizedOperandSegments, + AllTypesMatch<["gridSizeX", "gridSizeY", "gridSizeZ", "blockSizeX", + "blockSizeY", "blockSizeZ"]>]>, Arguments<(ins Variadic:$asyncDependencies, SymbolRefAttr:$kernel, - Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ, - Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ, + LaunchIndx:$gridSizeX, LaunchIndx:$gridSizeY, LaunchIndx:$gridSizeZ, + LaunchIndx:$blockSizeX, LaunchIndx:$blockSizeY, LaunchIndx:$blockSizeZ, Optional:$dynamicSharedMemorySize, - Variadic:$kernelOperands)>, + Variadic:$kernelOperands, + Optional:$asyncObject)>, Results<(outs Optional:$asyncToken)> { let summary = "Launches a function as a GPU kernel"; @@ -529,7 +534,11 @@ "KernelDim3":$blockSize, "Value":$dynamicSharedMemorySize, "ValueRange":$kernelOperands, CArg<"Type", "nullptr">:$asyncTokenType, - CArg<"ValueRange", "{}">:$asyncDependencies)> + CArg<"ValueRange", "{}">:$asyncDependencies)>, + OpBuilder<(ins "SymbolRefAttr":$kernel, "KernelDim3":$gridSize, + "KernelDim3":$blockSize, "Value":$dynamicSharedMemorySize, + "ValueRange":$kernelOperands, + CArg<"Value", "nullptr">:$asyncObject)> ]; let extraClassDeclaration = [{ @@ -559,9 +568,11 @@ let assemblyFormat = [{ custom(type($asyncToken), $asyncDependencies) + (`<` $asyncObject^ type($asyncObject) `>`)? $kernel - `blocks` `in` ` ` `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)` - `threads` `in` ` ` `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)` + `blocks` `in` ` ` `(` $gridSizeX `,` $gridSizeY `,` $gridSizeZ `)` + `threads` `in` ` ` `(` $blockSizeX `,` $blockSizeY `,` $blockSizeZ `)` + custom(type($gridSizeX)) (`dynamic_shared_memory_size` $dynamicSharedMemorySize^)? custom($kernelOperands, type($kernelOperands)) attr-dict }]; diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -268,11 +268,21 @@ return success(); // Check that `launch_func` refers to a well-formed GPU kernel module. - StringAttr kernelModuleName = launchOp.getKernelModuleName(); - auto kernelModule = module.lookupSymbol(kernelModuleName); + StringAttr kernelContainerName = launchOp.getKernelModuleName(); + Operation *kernelContainer = module.lookupSymbol(kernelContainerName); + if (!kernelContainer) + return launchOp.emitOpError() + << "kernel container '" << kernelContainerName.getValue() + << "' is undefined"; + + // If the container is a GPU binary op return success. + if (isa(kernelContainer)) + return success(); + + auto kernelModule = dyn_cast(kernelContainer); if (!kernelModule) return launchOp.emitOpError() - << "kernel module '" << kernelModuleName.getValue() + << "kernel module '" << kernelContainerName.getValue() << "' is undefined"; // Check that `launch_func` refers to a well-formed kernel function. @@ -930,10 +940,36 @@ SymbolRefAttr::get(kernelModule.getNameAttr(), {SymbolRefAttr::get(kernelFunc.getNameAttr())}); result.addAttribute(getKernelAttrName(result.name), kernelSymbol); - SmallVector segmentSizes(9, 1); + SmallVector segmentSizes(10, 1); segmentSizes.front() = asyncDependencies.size(); - segmentSizes[segmentSizes.size() - 2] = dynamicSharedMemorySize ? 1 : 0; - segmentSizes.back() = static_cast(kernelOperands.size()); + segmentSizes[segmentSizes.size() - 3] = dynamicSharedMemorySize ? 1 : 0; + segmentSizes[segmentSizes.size() - 2] = + static_cast(kernelOperands.size()); + segmentSizes.back() = 0; + result.addAttribute(getOperandSegmentSizeAttr(), + builder.getDenseI32ArrayAttr(segmentSizes)); +} + +void LaunchFuncOp::build(OpBuilder &builder, OperationState &result, + SymbolRefAttr kernel, KernelDim3 gridSize, + KernelDim3 getBlockSize, Value dynamicSharedMemorySize, + ValueRange kernelOperands, Value asyncObject) { + // Add grid and block sizes as op operands, followed by the data operands. + result.addOperands({gridSize.x, gridSize.y, gridSize.z, getBlockSize.x, + getBlockSize.y, getBlockSize.z}); + if (dynamicSharedMemorySize) + result.addOperands(dynamicSharedMemorySize); + result.addOperands(kernelOperands); + if (asyncObject) + result.addOperands(asyncObject); + result.addAttribute(getKernelAttrName(result.name), kernel); + SmallVector segmentSizes(10, 1); + segmentSizes.front() = 0; + segmentSizes[segmentSizes.size() - 3] = dynamicSharedMemorySize ? 1 : 0; + segmentSizes[segmentSizes.size() - 2] = + static_cast(kernelOperands.size()); + + segmentSizes.back() = asyncObject ? 1 : 0; result.addAttribute(getOperandSegmentSizeAttr(), builder.getDenseI32ArrayAttr(segmentSizes)); } @@ -978,6 +1014,22 @@ return success(); } +static ParseResult parseLaunchDimType(OpAsmParser &parser, Type &dimTy) { + if (succeeded(parser.parseOptionalColon())) { + if (parser.parseType(dimTy)) + return failure(); + } else { + dimTy = IndexType::get(parser.getContext()); + } + return success(); +} + +static void printLaunchDimType(OpAsmPrinter &printer, Operation *op, + Type dimTy) { + if (!dimTy.isIndex()) + printer << ": " << dimTy; +} + static ParseResult parseLaunchFuncOperands( OpAsmParser &parser, SmallVectorImpl &argNames, diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -77,7 +77,7 @@ module attributes {gpu.container_module} { func.func @launch_func_undefined_module(%sz : index) { - // expected-error@+1 {{kernel module 'kernels' is undefined}} + // expected-error@+1 {{kernel container 'kernels' is undefined}} gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) return }