diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -431,14 +431,17 @@ let hasVerifier = 1; } +def LaunchIndx : AnyTypeOf<[Index, I32, I64]>; + def GPU_LaunchFuncOp : GPU_Op<"launch_func", [GPU_AsyncOpInterface, AttrSizedOperandSegments]>, Arguments<(ins Variadic:$asyncDependencies, SymbolRefAttr:$kernel, - Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ, - Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ, + LaunchIndx:$gridSizeX, LaunchIndx:$gridSizeY, LaunchIndx:$gridSizeZ, + LaunchIndx:$blockSizeX, LaunchIndx:$blockSizeY, LaunchIndx:$blockSizeZ, Optional:$dynamicSharedMemorySize, - Variadic:$kernelOperands)>, + Variadic:$kernelOperands, + Optional:$asyncObject)>, Results<(outs Optional:$asyncToken)> { let summary = "Launches a function as a GPU kernel"; @@ -529,7 +532,11 @@ "KernelDim3":$blockSize, "Value":$dynamicSharedMemorySize, "ValueRange":$kernelOperands, CArg<"Type", "nullptr">:$asyncTokenType, - CArg<"ValueRange", "{}">:$asyncDependencies)> + CArg<"ValueRange", "{}">:$asyncDependencies)>, + OpBuilder<(ins "SymbolRefAttr":$kernel, "KernelDim3":$gridSize, + "KernelDim3":$blockSize, "Value":$dynamicSharedMemorySize, + "ValueRange":$kernelOperands, + CArg<"Value", "nullptr">:$asyncObject)> ]; let extraClassDeclaration = [{ @@ -559,9 +566,10 @@ let assemblyFormat = [{ custom(type($asyncToken), $asyncDependencies) + (`<` $asyncObject^ type($asyncObject) `>`)? $kernel - `blocks` `in` ` ` `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)` - `threads` `in` ` ` `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)` + `blocks` `in` custom($gridSizeX, type($gridSizeX), $gridSizeY, type($gridSizeY), $gridSizeZ, type($gridSizeZ)) + `threads` `in` custom($blockSizeX, type($blockSizeX), $blockSizeY, type($blockSizeY), $blockSizeZ, type($blockSizeZ)) (`dynamic_shared_memory_size` $dynamicSharedMemorySize^)? custom($kernelOperands, type($kernelOperands)) attr-dict }]; diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -275,11 +275,21 @@ return success(); // Check that `launch_func` refers to a well-formed GPU kernel module. - StringAttr kernelModuleName = launchOp.getKernelModuleName(); - auto kernelModule = module.lookupSymbol(kernelModuleName); + StringAttr kernelContainerName = launchOp.getKernelModuleName(); + Operation *kernelContainer = module.lookupSymbol(kernelContainerName); + if (!kernelContainer) + return launchOp.emitOpError() + << "kernel container '" << kernelContainerName.getValue() + << "' is undefined"; + + // If the container is a GPU binary op return success. + if (isa(kernelContainer)) + return success(); + + auto kernelModule = dyn_cast(kernelContainer); if (!kernelModule) return launchOp.emitOpError() - << "kernel module '" << kernelModuleName.getValue() + << "kernel module '" << kernelContainerName.getValue() << "' is undefined"; // Check that `launch_func` refers to a well-formed kernel function. @@ -937,10 +947,36 @@ SymbolRefAttr::get(kernelModule.getNameAttr(), {SymbolRefAttr::get(kernelFunc.getNameAttr())}); result.addAttribute(getKernelAttrName(result.name), kernelSymbol); - SmallVector segmentSizes(9, 1); + SmallVector segmentSizes(10, 1); segmentSizes.front() = asyncDependencies.size(); - segmentSizes[segmentSizes.size() - 2] = dynamicSharedMemorySize ? 1 : 0; - segmentSizes.back() = static_cast(kernelOperands.size()); + segmentSizes[segmentSizes.size() - 3] = dynamicSharedMemorySize ? 1 : 0; + segmentSizes[segmentSizes.size() - 2] = + static_cast(kernelOperands.size()); + segmentSizes.back() = 0; + result.addAttribute(getOperandSegmentSizeAttr(), + builder.getDenseI32ArrayAttr(segmentSizes)); +} + +void LaunchFuncOp::build(OpBuilder &builder, OperationState &result, + SymbolRefAttr kernel, KernelDim3 gridSize, + KernelDim3 getBlockSize, Value dynamicSharedMemorySize, + ValueRange kernelOperands, Value asyncObject) { + // Add grid and block sizes as op operands, followed by the data operands. + result.addOperands({gridSize.x, gridSize.y, gridSize.z, getBlockSize.x, + getBlockSize.y, getBlockSize.z}); + if (dynamicSharedMemorySize) + result.addOperands(dynamicSharedMemorySize); + result.addOperands(kernelOperands); + if (asyncObject) + result.addOperands(asyncObject); + result.addAttribute(getKernelAttrName(result.name), kernel); + SmallVector segmentSizes(10, 1); + segmentSizes.front() = 0; + segmentSizes[segmentSizes.size() - 3] = dynamicSharedMemorySize ? 1 : 0; + segmentSizes[segmentSizes.size() - 2] = + static_cast(kernelOperands.size()); + + segmentSizes.back() = asyncObject ? 1 : 0; result.addAttribute(getOperandSegmentSizeAttr(), builder.getDenseI32ArrayAttr(segmentSizes)); } @@ -982,9 +1018,51 @@ GPUDialect::getContainerModuleAttrName() + "' attribute"); + KernelDim3 grid = getGridSizeOperandValues(); + KernelDim3 block = getBlockSizeOperandValues(); + if (grid.x.getType() != grid.y.getType() || + grid.x.getType() != grid.z.getType() || + grid.x.getType() != block.x.getType() || + grid.x.getType() != block.y.getType() || + grid.x.getType() != block.z.getType()) + return emitOpError( + "expected the grid and block sizes all having the same type"); + return success(); +} + +static ParseResult +parseDim3(OpAsmParser &parser, OpAsmParser::UnresolvedOperand &sizeX, + Type &sizeXTy, OpAsmParser::UnresolvedOperand &sizeY, Type &sizeYTy, + OpAsmParser::UnresolvedOperand &sizeZ, Type &sizeZTy) { + if (parser.parseLParen() || parser.parseOperand(sizeX) || + parser.parseComma() || parser.parseOperand(sizeY) || + parser.parseComma() || parser.parseOperand(sizeZ) || parser.parseRParen()) + return failure(); + SmallVector types; + if (failed(parser.parseOptionalColonTypeList(types))) + return failure(); + if (types.size()) { + sizeXTy = types[0]; + sizeYTy = types[0]; + sizeZTy = types[0]; + } else { + types.push_back(IndexType::get(parser.getContext())); + sizeXTy = types[0]; + sizeYTy = types[0]; + sizeZTy = types[0]; + } return success(); } +static void printDim3(OpAsmPrinter &printer, Operation *op, Value sizeX, + Type sizeXTy, Value sizeY, Type sizeYTy, Value sizeZ, + Type sizeZTy) { + printer << '(' << sizeX << ", " << sizeY << ", " << sizeZ << ')'; + IndexType indexType = IndexType::get(op->getContext()); + if (indexType != sizeXTy) + printer << " : " << sizeXTy; +} + static ParseResult parseLaunchFuncOperands( OpAsmParser &parser, SmallVectorImpl &argNames, diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -77,7 +77,7 @@ module attributes {gpu.container_module} { func.func @launch_func_undefined_module(%sz : index) { - // expected-error@+1 {{kernel module 'kernels' is undefined}} + // expected-error@+1 {{kernel container 'kernels' is undefined}} gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) return }