Diff 297887

mlir/include/mlir/Dialect/GPU/GPUOps.td

Show First 20 Lines • Show All 285 Lines • ▼ Show 20 Lines	let extraClassDeclaration = [{
LogicalResult verifyBody();		LogicalResult verifyBody();
}];		}];

// let verifier = [{ return ::verifFuncOpy(*this); }];		// let verifier = [{ return ::verifFuncOpy(*this); }];
let printer = [{ printGPUFuncOp(p, *this); }];		let printer = [{ printGPUFuncOp(p, *this); }];
let parser = [{ return parseGPUFuncOp(parser, result); }];		let parser = [{ return parseGPUFuncOp(parser, result); }];
}		}

def GPU_LaunchFuncOp : GPU_Op<"launch_func">,		def GPU_LaunchFuncOp : GPU_Op<"launch_func",
Arguments<(ins SymbolRefAttr:$kernel,		[GPU_AsyncOpInterface, AttrSizedOperandSegments]>,
		Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
		SymbolRefAttr:$kernel,
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,		Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,		Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
Variadic<AnyType>:$operands)>,		Variadic<AnyType>:$operands)>,
Results<(outs)> {		Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "Launches a function as a GPU kernel";		let summary = "Launches a function as a GPU kernel";

let description = [{		let description = [{
Launch a kernel function on the specified grid of thread blocks.		Launch a kernel function on the specified grid of thread blocks.
`gpu.launch` operations are lowered to `gpu.launch_func` operations by		`gpu.launch` operations are lowered to `gpu.launch_func` operations by
outlining the kernel body into a function in a dedicated module, which		outlining the kernel body into a function in a dedicated module, which
reflects the separate compilation process. The kernel function is required		reflects the separate compilation process. The kernel function is required
to have the `gpu.kernel` attribute. The module containing the kernel		to have the `gpu.kernel` attribute. The module containing the kernel
function is required to be a gpu.module. And finally, the module containing		function is required to be a gpu.module. And finally, the module containing
the kernel module (which thus cannot be the top-level module) is required		the kernel module (which thus cannot be the top-level module) is required
to have the `gpu.container_module` attribute. The `gpu.launch_func`		to have the `gpu.container_module` attribute. The `gpu.launch_func`
operation has a symbol attribute named `kernel` to identify the fully		operation has a symbol attribute named `kernel` to identify the fully
specified kernel function to launch (both the gpu.module and func).		specified kernel function to launch (both the gpu.module and func).

The operation takes at least six operands, with the first three operands		The `gpu.launch_func` supports async dependencies: the kernel does not start
being grid sizes along x,y,z dimensions and the following three being block		executing until the ops producing those async dependencies have completed.
sizes along x,y,z dimensions. When a lower-dimensional kernel is required,
unused sizes must be explicitly set to `1`. The remaining operands are		By the default, the host implicitly blocks until kernel execution has
passed as arguments to the kernel function.		completed. If the `async` keyword is present, the host does not block but
		instead a `!gpu.async.token` is returned. Other async GPU ops can take this
		token as dependency.

		The operation requires at least the grid and block sizes along the x,y,z
		dimensions as arguments. When a lower-dimensional kernel is required,
		unused sizes must be explicitly set to `1`.

		The remaining operands are passed as arguments to the kernel function.

Example:		Example:

```mlir		```mlir
module attributes {gpu.container_module} {		module attributes {gpu.container_module} {

// This module creates a separate compilation unit for the GPU compiler.		// This module creates a separate compilation unit for the GPU compiler.
gpu.module @kernels {		gpu.module @kernels {
Show All 19 Lines	module attributes {gpu.container_module} {
%gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)		%gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)		%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)

"some_op"(%bx, %tx) : (index, index) -> ()		"some_op"(%bx, %tx) : (index, index) -> ()
%42 = load %arg1[%bx] : memref<?xf32, 1>		%42 = load %arg1[%bx] : memref<?xf32, 1>
}		}
}		}

gpu.launch_func		%t0 = gpu.wait async
		%t1 = gpu.launch_func
		async // (Optional) Don't block host, return token.
		[%t0] // (Optional) Execute only after %t0 has completed.
@kernels::@kernel_1 // Kernel module and function.		@kernels::@kernel_1 // Kernel module and function.
grid in (%cst, %cst, %cst) // Grid sizes.		grid in (%cst, %cst, %cst) // Grid sizes.
block in (%cst, %cst, %cst) // Block sizes.		block in (%cst, %cst, %cst) // Block sizes.
(%arg0 : f32, %arg1 : memref<?xf32, 1>) // Arguments passed to the kernel.		(%arg0 : f32, // Arguments passed to the kernel.
		%arg1 : memref<?xf32, 1>)
}		}
```		```
}];		}];

let skipDefaultBuilders = 1;		let skipDefaultBuilders = 1;

let builders = [		let builders = [
OpBuilder<"GPUFuncOp kernelFunc, "		OpBuilder<"GPUFuncOp kernelFunc, "
Show All 31 Lines	let extraClassDeclaration = [{

/// The name of the symbol reference attribute specifying the kernel to launch.		/// The name of the symbol reference attribute specifying the kernel to launch.
static StringRef getKernelAttrName() { return "kernel"; }		static StringRef getKernelAttrName() { return "kernel"; }
}];		}];

let verifier = [{ return ::verify(*this); }];		let verifier = [{ return ::verify(*this); }];
let assemblyFormat = [{		let assemblyFormat = [{
$kernel		$kernel
		custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
`grid` `in` custom<Space>() `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)`		`grid` `in` custom<Space>() `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)`
`block` `in` custom<Space>() `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)`		`block` `in` custom<Space>() `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)`
custom<LaunchFuncOperands>($operands, type($operands))		custom<LaunchFuncOperands>($operands, type($operands))
attr-dict		attr-dict
}];		}];
}		}

def GPU_LaunchOp : GPU_Op<"launch">,		def GPU_LaunchOp : GPU_Op<"launch">,
▲ Show 20 Lines • Show All 329 Lines • ▼ Show 20 Lines	let description = [{
launched afterwards. Writes from the device are guaranteed to be visible on		launched afterwards. Writes from the device are guaranteed to be visible on
the host after synchronizing with the device kernel completion.		the host after synchronizing with the device kernel completion.
}];		}];

let assemblyFormat = "$value attr-dict `:` type($value)";		let assemblyFormat = "$value attr-dict `:` type($value)";
let verifier = [{ return success(); }];		let verifier = [{ return success(); }];
}		}

		def GPU_WaitOp : GPU_Op<"wait", [GPU_AsyncOpInterface]> {
		let summary = "Wait for async gpu ops to complete.";
		let description = [{
		This op synchronizes the host or the device with a list of dependent ops.

		If the op contains the `async` keyword, it returns a new async token which
		is synchronized with the op arguments. This new token is merely a shortcut
		to the argument list, and one could replace the uses of the result with the
		arguments for the same effect. The async version of this op is primarily
		used to make each async token have a single use during lowering and
		thereby make forks in async execution explicit. Example usage:

		```mlir
		%t0 = gpu.foo async : !gpu.async.token
		%t1 = gpu.bar async : !gpu.async.token
		%t2 = gpu.wait async [%t0, %t1]
		// gpu.baz doesn't run until gpu.foo and gpu.bar have both completed, just
		// as if the async dependencies were [%t0, %t1].
		%t3 = gpu.baz async [%t2]
		```

		If the op does not contain the `async` keyword, it does not return a new
		async token but blocks until all ops producing the async dependency tokens
		finished execution. All dependent memory operations are visible to the host
		once this op completes. Example usage:

		```mlir
		%t0 = gpu.foo async : !gpu.async.token
		%t1 = gpu.bar async : !gpu.async.token
		// The gpu.wait op blocks until gpu.foo and gpu.bar have completed.
		gpu.wait [%t0, %t1]
		```
		}];

		let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies);
		let results = (outs Optional<GPU_AsyncToken>:$asyncToken);

		let assemblyFormat = [{
		custom<AsyncDependencies>(type($asyncToken), $asyncDependencies) attr-dict
		}];
		}

#endif // GPU_OPS		#endif // GPU_OPS

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

Show First 20 Lines • Show All 432 Lines • ▼ Show 20 Lines	void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
// Add grid and block sizes as op operands, followed by the data operands.		// Add grid and block sizes as op operands, followed by the data operands.
result.addOperands({gridSize.x, gridSize.y, gridSize.z, blockSize.x,		result.addOperands({gridSize.x, gridSize.y, gridSize.z, blockSize.x,
blockSize.y, blockSize.z});		blockSize.y, blockSize.z});
result.addOperands(kernelOperands);		result.addOperands(kernelOperands);
auto kernelModule = kernelFunc.getParentOfType<GPUModuleOp>();		auto kernelModule = kernelFunc.getParentOfType<GPUModuleOp>();
auto kernelSymbol = builder.getSymbolRefAttr(		auto kernelSymbol = builder.getSymbolRefAttr(
kernelModule.getName(), {builder.getSymbolRefAttr(kernelFunc.getName())});		kernelModule.getName(), {builder.getSymbolRefAttr(kernelFunc.getName())});
result.addAttribute(getKernelAttrName(), kernelSymbol);		result.addAttribute(getKernelAttrName(), kernelSymbol);
		SmallVector<int32_t, 8> segmentSizes(8, 1);
		segmentSizes.front() = 0; // Initially no async dependencies.
		segmentSizes.back() = static_cast<int32_t>(kernelOperands.size());
		result.addAttribute(getOperandSegmentSizeAttr(),
		builder.getI32VectorAttr(segmentSizes));
}		}

unsigned LaunchFuncOp::getNumKernelOperands() {		unsigned LaunchFuncOp::getNumKernelOperands() {
return getNumOperands() - kNumConfigOperands;		return getNumOperands() - asyncDependencies().size() - kNumConfigOperands;
}		}

StringRef LaunchFuncOp::getKernelModuleName() {		StringRef LaunchFuncOp::getKernelModuleName() {
return kernel().getRootReference();		return kernel().getRootReference();
}		}

StringRef LaunchFuncOp::getKernelName() { return kernel().getLeafReference(); }		StringRef LaunchFuncOp::getKernelName() { return kernel().getLeafReference(); }

Value LaunchFuncOp::getKernelOperand(unsigned i) {		Value LaunchFuncOp::getKernelOperand(unsigned i) {
return getOperation()->getOperand(i + kNumConfigOperands);		return getOperand(asyncDependencies().size() + kNumConfigOperands + i);
}		}

KernelDim3 LaunchFuncOp::getGridSizeOperandValues() {		KernelDim3 LaunchFuncOp::getGridSizeOperandValues() {
return KernelDim3{getOperand(0), getOperand(1), getOperand(2)};		auto operands = getOperands().drop_front(asyncDependencies().size());
		return KernelDim3{operands[0], operands[1], operands[2]};
}		}

KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() {		KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() {
return KernelDim3{getOperand(3), getOperand(4), getOperand(5)};		auto operands = getOperands().drop_front(asyncDependencies().size());
		return KernelDim3{operands[3], operands[4], operands[5]};
}		}

static LogicalResult verify(LaunchFuncOp op) {		static LogicalResult verify(LaunchFuncOp op) {
auto module = op.getParentOfType<ModuleOp>();		auto module = op.getParentOfType<ModuleOp>();
if (!module)		if (!module)
return op.emitOpError("expected to belong to a module");		return op.emitOpError("expected to belong to a module");

if (!module.getAttrOfType<UnitAttr>(GPUDialect::getContainerModuleAttrName()))		if (!module.getAttrOfType<UnitAttr>(GPUDialect::getContainerModuleAttrName()))
▲ Show 20 Lines • Show All 364 Lines • ▼ Show 20 Lines	static void print(OpAsmPrinter &p, GPUModuleOp op) {
p << op.getOperationName() << ' ';		p << op.getOperationName() << ' ';
p.printSymbolName(op.getName());		p.printSymbolName(op.getName());
p.printOptionalAttrDictWithKeyword(op.getAttrs(),		p.printOptionalAttrDictWithKeyword(op.getAttrs(),
{SymbolTable::getSymbolAttrName()});		{SymbolTable::getSymbolAttrName()});
p.printRegion(op.getOperation()->getRegion(0), /printEntryBlockArgs=/false,		p.printRegion(op.getOperation()->getRegion(0), /printEntryBlockArgs=/false,
/printBlockTerminators=/false);		/printBlockTerminators=/false);
}		}

		static ParseResult parseAsyncDependencies(
		OpAsmParser &parser, Type &asyncTokenType,
		SmallVectorImpl<OpAsmParser::OperandType> &asyncDependencies) {
		auto loc = parser.getCurrentLocation();
		if (succeeded(parser.parseOptionalKeyword("async"))) {
		if (parser.getNumResults() == 0)
		return parser.emitError(loc, "needs to be named when marked 'async'");
		asyncTokenType = parser.getBuilder().getType<AsyncTokenType>();
		}
		return parser.parseOperandList(asyncDependencies,
		OpAsmParser::Delimiter::OptionalSquare);
		}

		static void printAsyncDependencies(OpAsmPrinter &printer, Type asyncTokenType,
		OperandRange asyncDependencies) {
		if (asyncTokenType)
		printer << "async ";
		if (asyncDependencies.empty())
		return;
		printer << "[";
		llvm::interleaveComma(asyncDependencies, printer);
		printer << "]";
		}

#include "mlir/Dialect/GPU/GPUOpInterfaces.cpp.inc"		#include "mlir/Dialect/GPU/GPUOpInterfaces.cpp.inc"

#define GET_OP_CLASSES		#define GET_OP_CLASSES
#include "mlir/Dialect/GPU/GPUOps.cpp.inc"		#include "mlir/Dialect/GPU/GPUOps.cpp.inc"

mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir

Show All 15 Lines	llvm.func @kernel(%arg0: !llvm.i32, %arg1: !llvm.ptr<float>,
llvm.return		llvm.return
}		}
}		}

func @foo(%buffer: memref<?xf32>) {		func @foo(%buffer: memref<?xf32>) {
%c8 = constant 8 : index		%c8 = constant 8 : index
%c32 = constant 32 : i32		%c32 = constant 32 : i32
"gpu.launch_func"(%c8, %c8, %c8, %c8, %c8, %c8, %c32, %buffer) {		"gpu.launch_func"(%c8, %c8, %c8, %c8, %c8, %c8, %c32, %buffer) {
		operand_segment_sizes = dense<[0, 1, 1, 1, 1, 1, 1, 2]> : vector<8xi32>,
		herhutUnsubmitted Not Done Reply Inline Actions It is sad that this gets printed. I assume there currently is no way around this with assemblyFormat. Does this roundtrip when reparsed? I assume `operand_segment_sizes` just gets overwritten with the constructed attribute. herhut: It is sad that this gets printed. I assume there currently is no way around this with…
		csiggAuthorUnsubmitted Done Reply Inline Actions That was the standard format. Changed to custom format now. csigg: That was the standard format. Changed to custom format now.
kernel = @kernel_module::@kernel		kernel = @kernel_module::@kernel
} : (index, index, index, index, index, index, i32, memref<?xf32>) -> ()		} : (index, index, index, index, index, index, i32, memref<?xf32>) -> ()
return		return
}		}

// CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : !llvm.i64		// CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : !llvm.i64
// CHECK: [[ADDRESSOF:%.*]] = llvm.mlir.addressof @[[GLOBAL]]		// CHECK: [[ADDRESSOF:%.*]] = llvm.mlir.addressof @[[GLOBAL]]
// CHECK: [[C0:%.*]] = llvm.mlir.constant(0 : index)		// CHECK: [[C0:%.*]] = llvm.mlir.constant(0 : index)
Show All 19 Lines

mlir/test/Dialect/GPU/invalid.mlir

Show All 31 Lines	func @launch_requires_gpu_return(%sz : index) {
return		return
}		}

// -----		// -----

func @launch_func_too_few_operands(%sz : index) {		func @launch_func_too_few_operands(%sz : index) {
// expected-error@+1 {{expected 6 or more operands}}		// expected-error@+1 {{expected 6 or more operands}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz)		"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz)
		{operand_segment_sizes = dense<[0, 1, 1, 1, 1, 1, 0, 0]> : vector<8xi32>}
: (index, index, index, index, index) -> ()		: (index, index, index, index, index) -> ()
return		return
}		}

// -----		// -----

func @launch_func_missing_parent_module_attribute(%sz : index) {		func @launch_func_missing_parent_module_attribute(%sz : index) {
// expected-error@+1 {{expected the closest surrounding module to have the 'gpu.container_module' attribute}}		// expected-error@+1 {{expected the closest surrounding module to have the 'gpu.container_module' attribute}}
gpu.launch_func @foo::@bar grid in (%sz, %sz, %sz) block in (%sz, %sz, %sz) ()		gpu.launch_func @foo::@bar grid in (%sz, %sz, %sz) block in (%sz, %sz, %sz) ()
return		return
}		}

// -----		// -----

module attributes {gpu.container_module} {		module attributes {gpu.container_module} {
func @launch_func_missing_callee_attribute(%sz : index) {		func @launch_func_missing_callee_attribute(%sz : index) {
// expected-error@+1 {{'gpu.launch_func' op requires attribute 'kernel'}}		// expected-error@+1 {{'gpu.launch_func' op requires attribute 'kernel'}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)		"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
		{operand_segment_sizes = dense<[0, 1, 1, 1, 1, 1, 1, 0]> : vector<8xi32>}
: (index, index, index, index, index, index) -> ()		: (index, index, index, index, index, index) -> ()
return		return
}		}
}		}

// -----		// -----

module attributes {gpu.container_module} {		module attributes {gpu.container_module} {
▲ Show 20 Lines • Show All 357 Lines • ▼ Show 20 Lines	module {
gpu.module @gpu_funcs {		gpu.module @gpu_funcs {
// expected-error @+1 {{'gpu.func' op expected at least 5 arguments to body region}}		// expected-error @+1 {{'gpu.func' op expected at least 5 arguments to body region}}
"gpu.func"() ( {		"gpu.func"() ( {
^bb0(%arg0: f32, %arg1: memref<?xf32>, %arg2: memref<5xf32, 3>, %arg3: memref<5xf32, 5>):		^bb0(%arg0: f32, %arg1: memref<?xf32>, %arg2: memref<5xf32, 3>, %arg3: memref<5xf32, 5>):
"gpu.return"() : () -> ()		"gpu.return"() : () -> ()
} ) {gpu.kernel, sym_name = "kernel_1", type = (f32, memref<?xf32>) -> (), workgroup_attributions = 3: i64} : () -> ()		} ) {gpu.kernel, sym_name = "kernel_1", type = (f32, memref<?xf32>) -> (), workgroup_attributions = 3: i64} : () -> ()
}		}
}		}

		// -----

		func @sync_wait_with_result() {
		// expected-error @+1 {{cannot name an operation with no results}}
		%t = gpu.wait
		}

		// -----

		func @async_wait_without_result() {
		// expected-error @+1 {{custom op 'gpu.wait' needs to be named when marked 'async'}}
		gpu.wait async
		}

mlir/test/Dialect/GPU/ops.mlir

Show First 20 Lines • Show All 67 Lines • ▼ Show 20 Lines	gpu.module @kernels {
}		}
}		}

func @foo() {		func @foo() {
%0 = "op"() : () -> (f32)		%0 = "op"() : () -> (f32)
%1 = "op"() : () -> (memref<?xf32, 1>)		%1 = "op"() : () -> (memref<?xf32, 1>)
// CHECK: %{{.*}} = constant 8		// CHECK: %{{.*}} = constant 8
%cst = constant 8 : index		%cst = constant 8 : index
		%t0 = gpu.wait async

// CHECK: gpu.launch_func @kernels::@kernel_1 grid in (%{{.}}, %{{.}}, %{{.}}) block in (%{{.}}, %{{.}}, %{{.}}) (%{{.}} : f32, %{{.}} : memref<?xf32, 1>)		// CHECK: gpu.launch_func @kernels::@kernel_1 grid in (%{{.}}, %{{.}}, %{{.}}) block in (%{{.}}, %{{.}}, %{{.}}) (%{{.}} : f32, %{{.}} : memref<?xf32, 1>)
gpu.launch_func @kernels::@kernel_1 grid in (%cst, %cst, %cst) block in (%cst, %cst, %cst) (%0 : f32, %1 : memref<?xf32, 1>)		gpu.launch_func @kernels::@kernel_1 grid in (%cst, %cst, %cst) block in (%cst, %cst, %cst) (%0 : f32, %1 : memref<?xf32, 1>)

// CHECK: gpu.launch_func @kernels::@kernel_2 grid in (%{{.}}, %{{.}}, %{{.}}) block in (%{{.}}, %{{.}}, %{{.}}) (%{{.}} : f32, %{{.}} : memref<?xf32, 1>)		// CHECK: gpu.launch_func @kernels::@kernel_2 grid in (%{{.}}, %{{.}}, %{{.}}) block in (%{{.}}, %{{.}}, %{{.}}) (%{{.}} : f32, %{{.}} : memref<?xf32, 1>)
gpu.launch_func @kernels::@kernel_2 grid in (%cst, %cst, %cst) block in (%cst, %cst, %cst) (%0 : f32, %1 : memref<?xf32, 1>)		gpu.launch_func @kernels::@kernel_2 grid in (%cst, %cst, %cst) block in (%cst, %cst, %cst) (%0 : f32, %1 : memref<?xf32, 1>)

		// CHECK: %{{.}} = gpu.launch_func @kernels::@kernel_1 async [%{{.}}] grid in (%{{.}}, %{{.}}, %{{.}}) block in (%{{.}}, %{{.}}, %{{.}}) (%{{.}} : f32, %{{.}} : memref<?xf32, 1>)
		%t1 = gpu.launch_func @kernels::@kernel_1 async [%t0] grid in (%cst, %cst, %cst) block in (%cst, %cst, %cst) (%0 : f32, %1 : memref<?xf32, 1>)

return		return
}		}

gpu.module @gpu_funcs {		gpu.module @gpu_funcs {
// CHECK-LABEL: gpu.func @kernel_1({{.*}}: f32)		// CHECK-LABEL: gpu.func @kernel_1({{.*}}: f32)
// CHECK: workgroup		// CHECK: workgroup
// CHECK: private		// CHECK: private
// CHECK: attributes		// CHECK: attributes
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines	gpu.module @explicit_attributions {
} ) {gpu.kernel, sym_name = "kernel_1", type = (f32, memref<?xf32>) -> (), workgroup_attributions = 1: i64} : () -> ()		} ) {gpu.kernel, sym_name = "kernel_1", type = (f32, memref<?xf32>) -> (), workgroup_attributions = 1: i64} : () -> ()
}		}

func @async_token(%arg0 : !gpu.async.token) -> !gpu.async.token {		func @async_token(%arg0 : !gpu.async.token) -> !gpu.async.token {
// CHECK-LABEL: func @async_token({{.*}}: !gpu.async.token)		// CHECK-LABEL: func @async_token({{.*}}: !gpu.async.token)
// CHECK: return {{.*}} : !gpu.async.token		// CHECK: return {{.*}} : !gpu.async.token
return %arg0 : !gpu.async.token		return %arg0 : !gpu.async.token
}		}

		func @async_wait() {
		// CHECK-LABEL: func @async_wait
		// CHECK: %[[t0:.*]] = gpu.wait async
		%0 = gpu.wait async
		// CHECK: %[[t1:.*]] = gpu.wait async [%[[t0]]]
		%1 = gpu.wait async [%0]
		// CHECK: %{{.*}} = gpu.wait async [%[[t0]], %[[t1]]]
		%2 = gpu.wait async [%0, %1]
		// CHECK: gpu.wait [%[[t0]], %[[t1]]]
		// CHECK-NOT: async
		gpu.wait [%0, %1]
		// CHECK: gpu.wait
		// CHECK-NOT: async
		gpu.wait // Valid, but a no-op.
		return
		}
}		}

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][gpu] Allow gpu.launch_func to be async.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 297887

mlir/include/mlir/Dialect/GPU/GPUOps.td

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir

mlir/test/Dialect/GPU/invalid.mlir

mlir/test/Dialect/GPU/ops.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][gpu] Allow gpu.launch_func to be async.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 297887

mlir/include/mlir/Dialect/GPU/GPUOps.td

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir

mlir/test/Dialect/GPU/invalid.mlir

mlir/test/Dialect/GPU/ops.mlir

[mlir][gpu] Allow gpu.launch_func to be async.
ClosedPublic