diff --git a/mlir/include/mlir/Dialect/GPU/CMakeLists.txt b/mlir/include/mlir/Dialect/GPU/CMakeLists.txt --- a/mlir/include/mlir/Dialect/GPU/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/GPU/CMakeLists.txt @@ -1,6 +1,11 @@ add_mlir_dialect(GPUOps gpu) add_mlir_doc(GPUOps -gen-op-doc GPUOps Dialects/) +set(LLVM_TARGET_DEFINITIONS GPUBase.td) +mlir_tablegen(GPUOpInterfaces.h.inc -gen-op-interface-decls) +mlir_tablegen(GPUOpInterfaces.cpp.inc -gen-op-interface-defs) +add_public_tablegen_target(MLIRGPUOpInterfacesIncGen) + set(LLVM_TARGET_DEFINITIONS ParallelLoopMapperAttr.td) mlir_tablegen(ParallelLoopMapperAttr.h.inc -gen-struct-attr-decls) mlir_tablegen(ParallelLoopMapperAttr.cpp.inc -gen-struct-attr-defs) diff --git a/mlir/include/mlir/Dialect/GPU/GPUBase.td b/mlir/include/mlir/Dialect/GPU/GPUBase.td --- a/mlir/include/mlir/Dialect/GPU/GPUBase.td +++ b/mlir/include/mlir/Dialect/GPU/GPUBase.td @@ -53,4 +53,53 @@ }]; } +def GPU_AsyncToken : DialectType< + GPU_Dialect, CPred<"$_self.isa<::mlir::gpu::AsyncTokenType>()">, "async token type">, + BuildableType<"mlir::gpu::AsyncTokenType::get($_builder.getContext())">; + +def GPU_AsyncOpInterface : OpInterface<"AsyncOpInterface"> { + let description = [{ + Interface for GPU operations that execute asynchronously on the device. + + GPU operations implementing this interface take a list of dependencies + as `gpu.async.token` arguments and optionally return a `gpu.async.token`. + + The op doesn't start executing until all depent ops producing the async + dependency tokens have finished executing. + + If the op returns a token, the op merely schedules the execution on the + device and returns immediately, without waiting for the execution to + complete. On the hand, if the op does not return a token, the op will wait + for the execution to complete. + }]; + let cppNamespace = "::mlir::gpu"; + + let methods = [ + InterfaceMethod<[{ + Query the operands that represent async dependency tokens. + }], + "OperandRange", "getAsyncDependencies", (ins), [{}], [{ + ConcreteOp op = cast(this->getOperation()); + return op.asyncDependencies(); + }] + >, + InterfaceMethod<[{ + Adds a new token to the list of async dependencies. + }], + "void", "addAsyncDependency", (ins "Value":$token), + [{}], [{ + ::mlir::gpu::addAsyncDependency(this->getOperation(), token); + }] + >, + InterfaceMethod<[{ + Query the result that represents the async token to depend on. + }], + "OpResult", "getAsyncToken", (ins), [{}], [{ + ConcreteOp op = cast(this->getOperation()); + return op.asyncToken().template dyn_cast_or_null(); + }] + > + ]; +} + #endif // GPU_BASE diff --git a/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/GPUDialect.h --- a/mlir/include/mlir/Dialect/GPU/GPUDialect.h +++ b/mlir/include/mlir/Dialect/GPU/GPUDialect.h @@ -14,6 +14,7 @@ #ifndef MLIR_DIALECT_GPU_GPUDIALECT_H #define MLIR_DIALECT_GPU_GPUDIALECT_H +#include "mlir/IR/Builders.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/FunctionSupport.h" #include "mlir/IR/OpDefinition.h" @@ -34,13 +35,24 @@ Value z; }; +class AsyncTokenType + : public Type::TypeBase { +public: + // Used for generic hooks in TypeBase. + using Base::Base; +}; + +// Adds a `gpu.async.token` to the front of the argument list. +void addAsyncDependency(Operation *op, Value token); + } // end namespace gpu } // end namespace mlir #include "mlir/Dialect/GPU/GPUOpsDialect.h.inc" +#include "mlir/Dialect/GPU/GPUOpInterfaces.h.inc" + #define GET_OP_CLASSES #include "mlir/Dialect/GPU/GPUOps.h.inc" - #endif // MLIR_DIALECT_GPU_GPUDIALECT_H diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -249,7 +249,7 @@ return getBody().getNumArguments() - getType().getNumInputs() - getNumWorkgroupAttributions(); } - + /// Returns a list of block arguments that correspond to buffers located in /// the private memory. ArrayRef getPrivateAttributions() { @@ -301,7 +301,7 @@ IntOrIndex:$blockSizeY, IntOrIndex:$blockSizeZ, Variadic:$operands)>, Results<(outs)> { - let summary = "Launches a function as a GPU kerneel"; + let summary = "Launches a function as a GPU kernel"; let description = [{ Launch a kernel function on the specified grid of thread blocks. diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -10,6 +10,7 @@ DEPENDS MLIRGPUOpsIncGen + MLIRGPUOpInterfacesIncGen MLIRGPUPassIncGen MLIRParallelLoopMapperAttrGen MLIRParallelLoopMapperEnumsGen diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -16,12 +16,13 @@ #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" -#include "mlir/IR/Function.h" +#include "mlir/IR/DialectImplementation.h" #include "mlir/IR/FunctionImplementation.h" #include "mlir/IR/Module.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/StandardTypes.h" +#include "llvm/ADT/TypeSwitch.h" using namespace mlir; using namespace mlir::gpu; @@ -36,12 +37,34 @@ } void GPUDialect::initialize() { + addTypes(); addOperations< #define GET_OP_LIST #include "mlir/Dialect/GPU/GPUOps.cpp.inc" >(); } +Type GPUDialect::parseType(DialectAsmParser &parser) const { + // Parse the main keyword for the type. + StringRef keyword; + if (parser.parseKeyword(&keyword)) + return Type(); + MLIRContext *context = getContext(); + + // Handle 'async token' types. + if (keyword == "async.token") + return AsyncTokenType::get(context); + + parser.emitError(parser.getNameLoc(), "unknown gpu type: " + keyword); + return Type(); +} + +void GPUDialect::printType(Type type, DialectAsmPrinter &os) const { + TypeSwitch(type) + .Case([&](Type) { os << "async.token"; }) + .Default([](Type) { llvm_unreachable("unexpected 'gpu' type kind"); }); +} + LogicalResult GPUDialect::verifyOperationAttribute(Operation *op, NamedAttribute attr) { if (!attr.second.isa() || @@ -195,6 +218,26 @@ return success(); } +//===----------------------------------------------------------------------===// +// AsyncOpInterface +//===----------------------------------------------------------------------===// + +void gpu::addAsyncDependency(Operation *op, Value token) { + op->insertOperands(0, {token}); + if (!op->template hasTrait()) + return; + auto attrName = + OpTrait::AttrSizedOperandSegments::getOperandSegmentSizeAttr(); + auto sizeAttr = op->template getAttrOfType(attrName); + if (!sizeAttr) + return; // Async dependencies is the only variadic operand. + SmallVector sizes; + for (auto size : sizeAttr.getIntValues()) + sizes.push_back(size.getSExtValue()); + ++sizes.front(); + op->setAttr(attrName, Builder(op->getContext()).getI32VectorAttr(sizes)); +} + //===----------------------------------------------------------------------===// // LaunchOp //===----------------------------------------------------------------------===// @@ -775,5 +818,7 @@ /*printBlockTerminators=*/false); } +#include "mlir/Dialect/GPU/GPUOpInterfaces.cpp.inc" + #define GET_OP_CLASSES #include "mlir/Dialect/GPU/GPUOps.cpp.inc" diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -143,4 +143,10 @@ "gpu.return"() : () -> () } ) {gpu.kernel, sym_name = "kernel_1", type = (f32, memref) -> (), workgroup_attributions = 1: i64} : () -> () } + + func @async_token(%arg0 : !gpu.async.token) -> !gpu.async.token { + // CHECK-LABEL: func @async_token({{.*}}: !gpu.async.token) + // CHECK: return {{.*}} : !gpu.async.token + return %arg0 : !gpu.async.token + } }