diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -879,4 +879,39 @@ }]; } +def GPU_MemcpyOp : GPU_Op<"memcpy", [ + GPU_AsyncOpInterface, MemoryEffects<[MemRead, MemWrite]> + ]> { + + let summary = "GPU memcpy operation"; + + let description = [{ + The `gpu.memcpy` operation copies the content of one memref to another. + + The op does not execute before all async dependencies have finished + executing. + + If the `async` keyword is present, the op is executed asynchronously (i.e. + it does not block until the execution has finished on the device). In + that case, it returns a !gpu.async.token. + + Example: + + ```mlir + %token = gpu.memcpy async [%dep] %dst, %src : memref, memref + ``` + }]; + + let arguments = (ins Variadic:$asyncDependencies, + Arg:$dst, + Arg:$src); + let results = (outs Optional:$asyncToken); + + let assemblyFormat = [{ + custom(type($asyncToken), $asyncDependencies) + $dst`,` $src `:` type($dst)`,` type($src) attr-dict + }]; + let verifier = [{ return ::verify(*this); }]; +} + #endif // GPU_OPS diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -22,6 +22,7 @@ #include "mlir/IR/FunctionImplementation.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" #include "llvm/ADT/TypeSwitch.h" using namespace mlir; @@ -842,6 +843,23 @@ /*printBlockTerminators=*/false); } +//===----------------------------------------------------------------------===// +// GPUMemcpyOp +//===----------------------------------------------------------------------===// + +static LogicalResult verify(MemcpyOp op) { + auto srcType = op.src().getType(); + auto dstType = op.dst().getType(); + + if (getElementTypeOrSelf(srcType) != getElementTypeOrSelf(dstType)) + return op.emitOpError("arguments have incompatible element type"); + + if (failed(verifyCompatibleShape(srcType, dstType))) + return op.emitOpError("arguments have incompatible shape"); + + return success(); +} + static ParseResult parseAsyncDependencies( OpAsmParser &parser, Type &asyncTokenType, SmallVectorImpl &asyncDependencies) { diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -444,3 +444,17 @@ // expected-error @+1 {{custom op 'gpu.wait' needs to be named when marked 'async'}} gpu.wait async } + +// ----- + +func @memcpy_incompatible_type(%dst : memref, %src : memref) { + // expected-error @+1 {{'gpu.memcpy' op arguments have incompatible element type}} + gpu.memcpy %dst, %src : memref, memref +} + +// ----- + +func @memcpy_incompatible_shape(%dst : memref<7xf32>, %src : memref<9xf32>) { + // expected-error @+1 {{'gpu.memcpy' op arguments have incompatible shape}} + gpu.memcpy %dst, %src : memref<7xf32>, memref<9xf32> +} diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -183,4 +183,15 @@ gpu.wait // Valid, but a no-op. return } + + func @memcpy(%dst : memref<3x7xf32>, %src : memref<3x7xf32, 1>) { + // CHECK-LABEL: func @memcpy + // CHECK: gpu.memcpy {{.*}}, {{.*}} : memref<3x7xf32>, memref<3x7xf32, 1> + gpu.memcpy %dst, %src : memref<3x7xf32>, memref<3x7xf32, 1> + // CHECK: %[[t0:.*]] = gpu.wait async + %0 = gpu.wait async + // CHECK: {{.*}} = gpu.memcpy async [%[[t0]]] {{.*}}, {{.*}} : memref<3x7xf32>, memref<3x7xf32, 1> + %1 = gpu.memcpy async [%0] %dst, %src : memref<3x7xf32>, memref<3x7xf32, 1> + return + } }