Diff 538461

mlir/include/mlir/Dialect/GPU/IR/GPUBase.td

	Show First 20 Lines • Show All 89 Lines • ▼ Show 20 Lines
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// GPU Types.			// GPU Types.
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	def GPU_AsyncToken : DialectType<			def GPU_AsyncToken : DialectType<
	GPU_Dialect, CPred<"::llvm::isa<::mlir::gpu::AsyncTokenType>($_self)">, "async token type">,			GPU_Dialect, CPred<"::llvm::isa<::mlir::gpu::AsyncTokenType>($_self)">, "async token type">,
	BuildableType<"mlir::gpu::AsyncTokenType::get($_builder.getContext())">;			BuildableType<"mlir::gpu::AsyncTokenType::get($_builder.getContext())">;

				def GPU_Queue : DialectType<
				GPU_Dialect, CPred<"$_self.isa<::mlir::gpu::QueueType>()">, "queue type">,
				BuildableType<"mlir::gpu::QueueType::get($_builder.getContext())">;

	// Predicat to check if type is gpu::MMAMatrixType.			// Predicat to check if type is gpu::MMAMatrixType.
	def IsMMAMatrixTypePred : CPred<"::llvm::isa<::mlir::gpu::MMAMatrixType>($_self)">;			def IsMMAMatrixTypePred : CPred<"::llvm::isa<::mlir::gpu::MMAMatrixType>($_self)">;

	def GPU_MMAMatrix : DialectType<			def GPU_MMAMatrix : DialectType<
	GPU_Dialect, IsMMAMatrixTypePred, "MMAMatrix type">;			GPU_Dialect, IsMMAMatrixTypePred, "MMAMatrix type">;

	// Memref type acceptable to gpu.subgroup_mma_{load\|store}_matrix ops.			// Memref type acceptable to gpu.subgroup_mma_{load\|store}_matrix ops.
	def GPU_MMAMemRef : MemRefOf<[I8, I32, F16, F32, VectorOfRankAndType<[1], [I8, I32, F16, F32]>]>;			def GPU_MMAMemRef : MemRefOf<[I8, I32, F16, F32, VectorOfRankAndType<[1], [I8, I32, F16, F32]>]>;
	▲ Show 20 Lines • Show All 76 Lines • Show Last 20 Lines

mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h

	Show All 40 Lines

	class AsyncTokenType			class AsyncTokenType
	: public Type::TypeBase<AsyncTokenType, Type, TypeStorage> {			: public Type::TypeBase<AsyncTokenType, Type, TypeStorage> {
	public:			public:
	// Used for generic hooks in TypeBase.			// Used for generic hooks in TypeBase.
	using Base::Base;			using Base::Base;
	};			};

				class QueueType : public Type::TypeBase<QueueType, Type, TypeStorage> {
				public:
				// Used for generic hooks in TypeBase.
				using Base::Base;
				};

	/// MMAMatrixType storage and uniquing. Array is uniqued based on its shape			/// MMAMatrixType storage and uniquing. Array is uniqued based on its shape
	/// and type.			/// and type.
	struct MMAMatrixStorageType : public TypeStorage {			struct MMAMatrixStorageType : public TypeStorage {
	MMAMatrixStorageType(unsigned numDims, const int64_t *dimShapes,			MMAMatrixStorageType(unsigned numDims, const int64_t *dimShapes,
	Type elementType, StringRef operand)			Type elementType, StringRef operand)
	: dimShapes(dimShapes), numDims(numDims), elementType(elementType),			: dimShapes(dimShapes), numDims(numDims), elementType(elementType),
	operand(operand) {}			operand(operand) {}

	▲ Show 20 Lines • Show All 143 Lines • Show Last 20 Lines

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Show First 20 Lines • Show All 430 Lines • ▼ Show 20 Lines

def GPU_LaunchFuncOp : GPU_Op<"launch_func",		def GPU_LaunchFuncOp : GPU_Op<"launch_func",
[GPU_AsyncOpInterface, AttrSizedOperandSegments]>,		[GPU_AsyncOpInterface, AttrSizedOperandSegments]>,
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,		Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
SymbolRefAttr:$kernel,		SymbolRefAttr:$kernel,
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,		Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,		Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
Optional<I32>:$dynamicSharedMemorySize,		Optional<I32>:$dynamicSharedMemorySize,
Variadic<AnyType>:$kernelOperands)>,		Variadic<AnyType>:$kernelOperands,
		Optional<GPU_Queue>:$queue)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {		Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "Launches a function as a GPU kernel";		let summary = "Launches a function as a GPU kernel";

let description = [{		let description = [{
Launch a kernel function on the specified grid of thread blocks.		Launch a kernel function on the specified grid of thread blocks.
`gpu.launch` operations are lowered to `gpu.launch_func` operations by		`gpu.launch` operations are lowered to `gpu.launch_func` operations by
outlining the kernel body into a function in a dedicated module, which		outlining the kernel body into a function in a dedicated module, which
reflects the separate compilation process. The kernel function is required		reflects the separate compilation process. The kernel function is required
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	module attributes {gpu.container_module} {
%arg1 : memref<?xf32, 1>)		%arg1 : memref<?xf32, 1>)
}		}
```		```
}];		}];

let skipDefaultBuilders = 1;		let skipDefaultBuilders = 1;

let builders = [		let builders = [
OpBuilder<(ins "GPUFuncOp":$kernelFunc, "KernelDim3":$gridSize,		OpBuilder<(ins "GPUFuncOp":$kernelFunc,
"KernelDim3":$blockSize, "Value":$dynamicSharedMemorySize,		"KernelDim3":$gridSize, "KernelDim3":$blockSize,
		"Value":$dynamicSharedMemorySize,
"ValueRange":$kernelOperands,		"ValueRange":$kernelOperands,
CArg<"Type", "nullptr">:$asyncTokenType,		CArg<"Type", "nullptr">:$asyncTokenType,
CArg<"ValueRange", "{}">:$asyncDependencies)>		CArg<"ValueRange", "{}">:$asyncDependencies,
];		CArg<"Value", "Value{}">:$queue)>];

let extraClassDeclaration = [{		let extraClassDeclaration = [{
/// The name of the kernel's containing module.		/// The name of the kernel's containing module.
StringAttr getKernelModuleName();		StringAttr getKernelModuleName();

/// The name of the kernel.		/// The name of the kernel.
StringAttr getKernelName();		StringAttr getKernelName();

Show All 15 Lines	friend LogicalResult GPUDialect::verifyOperationAttribute(Operation *,
NamedAttribute);		NamedAttribute);
}];		}];

let assemblyFormat = [{		let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)		custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$kernel		$kernel
`blocks` `in` ` ` `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)`		`blocks` `in` ` ` `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)`
`threads` `in` ` ` `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)`		`threads` `in` ` ` `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)`
		(`queue` $queue^)?
(`dynamic_shared_memory_size` $dynamicSharedMemorySize^)?		(`dynamic_shared_memory_size` $dynamicSharedMemorySize^)?
custom<LaunchFuncOperands>($kernelOperands, type($kernelOperands)) attr-dict		custom<LaunchFuncOperands>($kernelOperands, type($kernelOperands)) attr-dict
}];		}];
let hasVerifier = 1;		let hasVerifier = 1;
}		}

def GPU_LaunchOp : GPU_Op<"launch", [		def GPU_LaunchOp : GPU_Op<"launch", [
AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface,		AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface,
▲ Show 20 Lines • Show All 501 Lines • ▼ Show 20 Lines	let description = [{

This operation may not be supported in every environment, there is not yet a		This operation may not be supported in every environment, there is not yet a
way to check at runtime whether this feature is supported.		way to check at runtime whether this feature is supported.
}];		}];

let assemblyFormat = "$value attr-dict `:` type($value)";		let assemblyFormat = "$value attr-dict `:` type($value)";
}		}

def GPU_WaitOp : GPU_Op<"wait", [GPU_AsyncOpInterface]> {		def GPU_WaitOp : GPU_Op<"wait", [GPU_AsyncOpInterface, AttrSizedOperandSegments]> {
let summary = "Wait for async gpu ops to complete.";		let summary = "Wait for async gpu ops to complete.";
let description = [{		let description = [{
This op synchronizes the host or the device with a list of dependent ops.		This op synchronizes the host or the device with a list of dependent ops.

If the op contains the `async` keyword, it returns a new async token which		If the op contains the `async` keyword, it returns a new async token which
is synchronized with the op arguments. This new token is merely a shortcut		is synchronized with the op arguments. This new token is merely a shortcut
to the argument list, and one could replace the uses of the result with the		to the argument list, and one could replace the uses of the result with the
arguments for the same effect. The async version of this op is primarily		arguments for the same effect. The async version of this op is primarily
Show All 17 Lines	let description = [{
```mlir		```mlir
%t0 = gpu.foo async : !gpu.async.token		%t0 = gpu.foo async : !gpu.async.token
%t1 = gpu.bar async : !gpu.async.token		%t1 = gpu.bar async : !gpu.async.token
// The gpu.wait op blocks until gpu.foo and gpu.bar have completed.		// The gpu.wait op blocks until gpu.foo and gpu.bar have completed.
gpu.wait [%t0, %t1]		gpu.wait [%t0, %t1]
```		```
}];		}];

let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies);		let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
		Optional<GPU_Queue>:$queue);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);		let results = (outs Optional<GPU_AsyncToken>:$asyncToken);

		let skipDefaultBuilders = 1;

		let builders = [
		OpBuilder<(ins
		CArg<"Type", "nullptr">:$asyncTokenType,
		CArg<"ValueRange", "{}">:$asyncDependencies,
		CArg<"Value", "Value{}">:$queue)>];

let assemblyFormat = [{		let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies) attr-dict		custom<AsyncDependencies>(type($asyncToken), $asyncDependencies) attr-dict
		(`queue` $queue^)?
}];		}];

let hasCanonicalizer = 1;		let hasCanonicalizer = 1;
}		}

def GPU_AllocOp : GPU_Op<"alloc", [		def GPU_AllocOp : GPU_Op<"alloc", [
GPU_AsyncOpInterface,		GPU_AsyncOpInterface,
AttrSizedOperandSegments		AttrSizedOperandSegments
Show All 18 Lines	let description = [{

```mlir		```mlir
%memref, %token = gpu.alloc async [%dep] host_shared (%width) : memref<64x?xf32, 1>		%memref, %token = gpu.alloc async [%dep] host_shared (%width) : memref<64x?xf32, 1>
```		```
}];		}];

let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,		let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Variadic<Index>:$dynamicSizes, Variadic<Index>:$symbolOperands,		Variadic<Index>:$dynamicSizes, Variadic<Index>:$symbolOperands,
UnitAttr:$hostShared);		UnitAttr:$hostShared, Optional<GPU_Queue>:$queue);
let results = (outs Res<AnyMemRef, "", [MemAlloc]>:$memref,		let results = (outs Res<AnyMemRef, "", [MemAlloc]>:$memref,
Optional<GPU_AsyncToken>:$asyncToken);		Optional<GPU_AsyncToken>:$asyncToken);

let extraClassDeclaration = [{		let extraClassDeclaration = [{
MemRefType getType() { return ::llvm::cast<MemRefType>(getMemref().getType()); }		MemRefType getType() { return ::llvm::cast<MemRefType>(getMemref().getType()); }
}];		}];

		let skipDefaultBuilders = 1;

		let builders = [
		OpBuilder<(ins
		"Type":$memref,
		"Type":$asyncTokenType,
		"ValueRange":$asyncDependencies,
		"ValueRange":$dynamicSizes,
		"ValueRange":$symbolOperands,
		CArg<"bool", "false">:$hostShared,
		CArg<"Value", "Value{}">:$queue)>];

let assemblyFormat = [{		let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies) (` ` `host_shared` $hostShared^)? ` `		custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
`(` $dynamicSizes `)` (`` `[` $symbolOperands^ `]`)? attr-dict `:` type($memref)		(` ` `host_shared` $hostShared^)? ` `
		`(` $dynamicSizes `)` (`` `[` $symbolOperands^ `]`)?
		(` ` `queue` $queue^)? ` ` attr-dict `:` type($memref)
}];		}];

let hasVerifier = 1;		let hasVerifier = 1;
let hasCanonicalizer = 1;		let hasCanonicalizer = 1;
}		}

def GPU_DeallocOp : GPU_Op<"dealloc", [GPU_AsyncOpInterface]> {		def GPU_DeallocOp : GPU_Op<"dealloc", [GPU_AsyncOpInterface, AttrSizedOperandSegments]> {

let summary = "GPU memory deallocation operation";		let summary = "GPU memory deallocation operation";

let description = [{		let description = [{
The `gpu.dealloc` operation frees the region of memory referenced by a		The `gpu.dealloc` operation frees the region of memory referenced by a
memref which was originally created by the `gpu.alloc` operation. It is		memref which was originally created by the `gpu.alloc` operation. It is
similar to the `memref.dealloc` op, but supports asynchronous GPU execution.		similar to the `memref.dealloc` op, but supports asynchronous GPU execution.

The op does not execute before all async dependencies have finished		The op does not execute before all async dependencies have finished
executing.		executing.

If the `async` keyword is present, the op is executed asynchronously (i.e.		If the `async` keyword is present, the op is executed asynchronously (i.e.
it does not block until the execution has finished on the device). In		it does not block until the execution has finished on the device). In
that case, it returns a !gpu.async.token.		that case, it returns a !gpu.async.token.

Example:		Example:

```mlir		```mlir
%token = gpu.dealloc async [%dep] %memref : memref<8x64xf32, 1>		%token = gpu.dealloc async [%dep] %memref : memref<8x64xf32, 1>
```		```
}];		}];

let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,		let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Arg<AnyMemRef, "", [MemFree]>:$memref);		Arg<AnyMemRef, "", [MemFree]>:$memref,
		Optional<GPU_Queue>:$queue);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);		let results = (outs Optional<GPU_AsyncToken>:$asyncToken);

		let skipDefaultBuilders = 1;

		let builders = [
		OpBuilder<(ins
		"Type":$asyncTokenType,
		"ValueRange":$asyncDependencies,
		"Value":$memref,
		CArg<"Value", "Value{}">:$queue)>];


let assemblyFormat = [{		let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)		custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$memref attr-dict `:` type($memref)		$memref
		(`queue` $queue^)? attr-dict `:` type($memref)
}];		}];
}		}

def GPU_MemcpyOp : GPU_Op<"memcpy", [GPU_AsyncOpInterface]> {		def GPU_MemcpyOp : GPU_Op<"memcpy", [GPU_AsyncOpInterface, AttrSizedOperandSegments]> {

let summary = "GPU memcpy operation";		let summary = "GPU memcpy operation";

let description = [{		let description = [{
The `gpu.memcpy` operation copies the content of one memref to another.		The `gpu.memcpy` operation copies the content of one memref to another.

The op does not execute before all async dependencies have finished		The op does not execute before all async dependencies have finished
executing.		executing.

If the `async` keyword is present, the op is executed asynchronously (i.e.		If the `async` keyword is present, the op is executed asynchronously (i.e.
it does not block until the execution has finished on the device). In		it does not block until the execution has finished on the device). In
that case, it returns a !gpu.async.token.		that case, it returns a !gpu.async.token.

Example:		Example:

```mlir		```mlir
%token = gpu.memcpy async [%dep] %dst, %src : memref<?xf32, 1>, memref<?xf32>		%token = gpu.memcpy async [%dep] %dst, %src : memref<?xf32, 1>, memref<?xf32>
```		```
}];		}];

let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,		let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Arg<AnyMemRef, "", [MemWrite]>:$dst,		Arg<AnyMemRef, "", [MemWrite]>:$dst,
Arg<AnyMemRef, "", [MemRead]>:$src);		Arg<AnyMemRef, "", [MemRead]>:$src,
		Optional<GPU_Queue>:$queue);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);		let results = (outs Optional<GPU_AsyncToken>:$asyncToken);

		let skipDefaultBuilders = 1;

		let builders = [
		OpBuilder<(ins
		"Type":$asyncTokenType,
		"ValueRange":$asyncDependencies,
		"Value":$dst,
		"Value":$src,
		CArg<"Value", "Value{}">:$queue)>];

let assemblyFormat = [{		let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)		custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$dst`,` $src `:` type($dst)`,` type($src) attr-dict		$dst`,` $src (`queue` $queue^)? `:` type($dst)`,` type($src) attr-dict

}];		}];
let hasFolder = 1;		let hasFolder = 1;
let hasVerifier = 1;		let hasVerifier = 1;
let hasCanonicalizer = 1;		let hasCanonicalizer = 1;
}		}

def GPU_MemsetOp : GPU_Op<"memset",		def GPU_MemsetOp : GPU_Op<"memset",
[GPU_AsyncOpInterface, AllElementTypesMatch<["dst", "value"]>]> {		[GPU_AsyncOpInterface, AttrSizedOperandSegments,
		AllElementTypesMatch<["dst", "value"]>]> {

let summary = "GPU memset operation";		let summary = "GPU memset operation";

let description = [{		let description = [{
The `gpu.memset` operation sets the content of memref to a scalar value.		The `gpu.memset` operation sets the content of memref to a scalar value.

The op does not execute before all async dependencies have finished		The op does not execute before all async dependencies have finished
executing.		executing.

If the `async` keyword is present, the op is executed asynchronously (i.e.		If the `async` keyword is present, the op is executed asynchronously (i.e.
it does not block until the execution has finished on the device). In		it does not block until the execution has finished on the device). In
that case, it returns a !gpu.async.token.		that case, it returns a !gpu.async.token.

Example:		Example:

```mlir		```mlir
%token = gpu.memset async [%dep] %dst, %value : memref<?xf32, 1>, f32		%token = gpu.memset async [%dep] %dst, %value : memref<?xf32, 1>, f32
```		```
}];		}];

let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,		let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Arg<AnyMemRef, "", [MemWrite]>:$dst,		Arg<AnyMemRef, "", [MemWrite]>:$dst,
Arg<AnyType, "">:$value);		Arg<AnyType, "">:$value,
		Optional<GPU_Queue>:$queue);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);		let results = (outs Optional<GPU_AsyncToken>:$asyncToken);

let assemblyFormat = [{		let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)		custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$dst`,` $value `:` type($dst)`,` type($value) attr-dict		$dst`,` $value (`queue` $queue^)? `:` type($dst)`,` type($value) attr-dict
}];		}];
let hasFolder = 1;		let hasFolder = 1;
}		}

def GPU_SetDefaultDeviceOp : GPU_Op<"set_default_device",		def GPU_SetDefaultDeviceOp : GPU_Op<"set_default_device",
[MemoryEffects<[MemWrite]>]>,		[MemoryEffects<[MemWrite]>]>,
Arguments<(ins I32:$devIndex)> {		Arguments<(ins I32:$devIndex)> {
let summary = "Set default GPU for operations after this by index";		let summary = "Set default GPU for operations after this by index";
▲ Show 20 Lines • Show All 457 Lines • ▼ Show 20 Lines	let description = [{
%spmat, %token = gpu.create_2to4_spmat async [%dep] %rows, %cols, %mem : memref<?xf64>		%spmat, %token = gpu.create_2to4_spmat async [%dep] %rows, %cols, %mem : memref<?xf64>
```		```
}];		}];

let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,		let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Index:$rows,		Index:$rows,
Index:$cols,		Index:$cols,
AnyMemRef:$memref);		AnyMemRef:$memref);
let results = (outs Res<GPU_SparseSpMatHandle>:$spMat,		let results = (outs Res<GPU_SparseSpMatHandle>:$spMat,
Optional<GPU_AsyncToken>:$asyncToken);		Optional<GPU_AsyncToken>:$asyncToken);

let assemblyFormat = [{		let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)		custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$rows `,` $cols `,` $memref attr-dict `:` type($memref)		$rows `,` $cols `,` $memref attr-dict `:` type($memref)
}];		}];
}		}

▲ Show 20 Lines • Show All 356 Lines • ▼ Show 20 Lines	def GPU_SDDMMOp : GPU_Op<"sddmm", [GPU_AsyncOpInterface]> {
];		];

let assemblyFormat = [{		let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)		custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$dnmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $spmatC `,` $buffer attr-dict `:` type($buffer) `into` $computeType		$dnmatA (`{` $modeA^ `}`)? `,` $dnmatB (`{` $modeB^ `}`)? `,` $spmatC `,` $buffer attr-dict `:` type($buffer) `into` $computeType
}];		}];
}		}

		def GPU_CreateQueueOp : GPU_Op<"create_queue"> {
		let description = [{
		The `gpu.create_queue` takes an optional attribute `desc` as input and
		returns a new queue. `desc` meaning is implementation specific, e.g. it can
		be a device name.
		The queue is then used for launching/queuing kernels
		on the GPU.

		Example:

		```mlir
		%queue = gpu.create_queue "test"

		OR

		%queue = gpu.create_queue
		```

		}];

		let skipDefaultBuilders = 1;

		let arguments = (ins OptionalAttr<AnyAttr> : $desc);
		let results = (outs GPU_Queue : $queue);
		let builders = [OpBuilder<(ins CArg<"Attribute", "{}">:$desc)>];

		let assemblyFormat = "($desc^)? attr-dict";
		}

		def GPU_DestroyQueueOp : GPU_Op<"destroy_queue"> {
		let description = [{
		The `gpu.destroy_queue` op destoys the GPU queue.

		Example:

		```mlir
		gpu.destroy_queue %queue
		```
		}];

		let arguments = (ins GPU_Queue : $queue);

		let assemblyFormat = "attr-dict $queue";
		}

#endif // GPU_OPS		#endif // GPU_OPS

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

Show First 20 Lines • Show All 142 Lines • ▼ Show 20 Lines
};		};
} // namespace		} // namespace

void GPUDialect::initialize() {		void GPUDialect::initialize() {
addTypes<AsyncTokenType>();		addTypes<AsyncTokenType>();
addTypes<MMAMatrixType>();		addTypes<MMAMatrixType>();
addTypes<SparseDnTensorHandleType>();		addTypes<SparseDnTensorHandleType>();
addTypes<SparseSpMatHandleType>();		addTypes<SparseSpMatHandleType>();
		addTypes<QueueType>();
addOperations<		addOperations<
#define GET_OP_LIST		#define GET_OP_LIST
#include "mlir/Dialect/GPU/IR/GPUOps.cpp.inc"		#include "mlir/Dialect/GPU/IR/GPUOps.cpp.inc"
>();		>();
addAttributes<		addAttributes<
#define GET_ATTRDEF_LIST		#define GET_ATTRDEF_LIST
#include "mlir/Dialect/GPU/IR/GPUOpsAttributes.cpp.inc"		#include "mlir/Dialect/GPU/IR/GPUOpsAttributes.cpp.inc"
>();		>();
Show All 17 Lines	Type GPUDialect::parseType(DialectAsmParser &parser) const {
if (parser.parseKeyword(&keyword))		if (parser.parseKeyword(&keyword))
return Type();		return Type();
MLIRContext *context = getContext();		MLIRContext *context = getContext();

// Handle 'async token' types.		// Handle 'async token' types.
if (keyword == "async.token")		if (keyword == "async.token")
return AsyncTokenType::get(context);		return AsyncTokenType::get(context);

		if (keyword == "queue")
		return QueueType::get(context);

if (keyword == "mma_matrix") {		if (keyword == "mma_matrix") {
SMLoc beginLoc = parser.getNameLoc();		SMLoc beginLoc = parser.getNameLoc();

// Parse '<'.		// Parse '<'.
if (parser.parseLess())		if (parser.parseLess())
return nullptr;		return nullptr;

// Parse the size and elementType.		// Parse the size and elementType.
Show All 34 Lines
void GPUDialect::printType(Type type, DialectAsmPrinter &os) const {		void GPUDialect::printType(Type type, DialectAsmPrinter &os) const {
TypeSwitch<Type>(type)		TypeSwitch<Type>(type)
.Case<AsyncTokenType>([&](Type) { os << "async.token"; })		.Case<AsyncTokenType>([&](Type) { os << "async.token"; })
.Case<SparseDnTensorHandleType>([&](Type) {		.Case<SparseDnTensorHandleType>([&](Type) {
os << getSparseHandleKeyword(SparseHandleKind::DnTensor);		os << getSparseHandleKeyword(SparseHandleKind::DnTensor);
})		})
.Case<SparseSpMatHandleType>(		.Case<SparseSpMatHandleType>(
[&](Type) { os << getSparseHandleKeyword(SparseHandleKind::SpMat); })		[&](Type) { os << getSparseHandleKeyword(SparseHandleKind::SpMat); })
		.Case<QueueType>([&](Type) { os << "queue"; })
.Case<MMAMatrixType>([&](MMAMatrixType fragTy) {		.Case<MMAMatrixType>([&](MMAMatrixType fragTy) {
os << "mma_matrix<";		os << "mma_matrix<";
auto shape = fragTy.getShape();		auto shape = fragTy.getShape();
for (auto dim = shape.begin(), e = shape.end() - 1; dim != e; ++dim)		for (auto dim = shape.begin(), e = shape.end() - 1; dim != e; ++dim)
os << *dim << 'x';		os << *dim << 'x';
os << shape.back() << 'x' << fragTy.getElementType();		os << shape.back() << 'x' << fragTy.getElementType();
os << ", \"" << fragTy.getOperand() << "\"" << '>';		os << ", \"" << fragTy.getOperand() << "\"" << '>';
})		})
▲ Show 20 Lines • Show All 660 Lines • ▼ Show 20 Lines
/// private memory.		/// private memory.
BlockArgument LaunchOp::addPrivateAttribution(Type type, Location loc) {		BlockArgument LaunchOp::addPrivateAttribution(Type type, Location loc) {
// Buffers on the private memory always come after buffers on the workgroup		// Buffers on the private memory always come after buffers on the workgroup
// memory.		// memory.
return getBody().addArgument(type, loc);		return getBody().addArgument(type, loc);
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
		// CreateQueueOp
		//===----------------------------------------------------------------------===//

		void CreateQueueOp::build(OpBuilder &odsBuilder, OperationState &odsState,
		/optional/ Attribute desc) {
		if (desc)
		odsState.getOrAddProperties<Properties>().desc = desc;

		odsState.addTypes(QueueType::get(odsBuilder.getContext()));
		}

		//===----------------------------------------------------------------------===//
// LaunchFuncOp		// LaunchFuncOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,		void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
GPUFuncOp kernelFunc, KernelDim3 gridSize,		GPUFuncOp kernelFunc, KernelDim3 gridSize,
KernelDim3 getBlockSize, Value dynamicSharedMemorySize,		KernelDim3 getBlockSize, Value dynamicSharedMemorySize,
ValueRange kernelOperands, Type asyncTokenType,		ValueRange kernelOperands, Type asyncTokenType,
ValueRange asyncDependencies) {		ValueRange asyncDependencies, Value queue) {
result.addOperands(asyncDependencies);		result.addOperands(asyncDependencies);
if (asyncTokenType)		if (asyncTokenType)
result.types.push_back(builder.getType<AsyncTokenType>());		result.types.push_back(builder.getType<AsyncTokenType>());

// Add grid and block sizes as op operands, followed by the data operands.		// Add grid and block sizes as op operands, followed by the data operands.
result.addOperands({gridSize.x, gridSize.y, gridSize.z, getBlockSize.x,		result.addOperands({gridSize.x, gridSize.y, gridSize.z, getBlockSize.x,
getBlockSize.y, getBlockSize.z});		getBlockSize.y, getBlockSize.z});
if (dynamicSharedMemorySize)		if (dynamicSharedMemorySize)
result.addOperands(dynamicSharedMemorySize);		result.addOperands(dynamicSharedMemorySize);
result.addOperands(kernelOperands);		result.addOperands(kernelOperands);
auto kernelModule = kernelFunc->getParentOfType<GPUModuleOp>();		auto kernelModule = kernelFunc->getParentOfType<GPUModuleOp>();
auto kernelSymbol =		auto kernelSymbol =
SymbolRefAttr::get(kernelModule.getNameAttr(),		SymbolRefAttr::get(kernelModule.getNameAttr(),
{SymbolRefAttr::get(kernelFunc.getNameAttr())});		{SymbolRefAttr::get(kernelFunc.getNameAttr())});
result.addAttribute(getKernelAttrName(result.name), kernelSymbol);		result.addAttribute(getKernelAttrName(result.name), kernelSymbol);
SmallVector<int32_t, 9> segmentSizes(9, 1);
		if (queue)
		result.addOperands(queue);

		SmallVector<int32_t, 10> segmentSizes(10, 1);
segmentSizes.front() = asyncDependencies.size();		segmentSizes.front() = asyncDependencies.size();
segmentSizes[segmentSizes.size() - 2] = dynamicSharedMemorySize ? 1 : 0;		segmentSizes[segmentSizes.size() - 3] = dynamicSharedMemorySize ? 1 : 0;
segmentSizes.back() = static_cast<int32_t>(kernelOperands.size());		segmentSizes[segmentSizes.size() - 2] =
		static_cast<int32_t>(kernelOperands.size());
		segmentSizes.back() = queue ? 1 : 0;
result.addAttribute(getOperandSegmentSizeAttr(),		result.addAttribute(getOperandSegmentSizeAttr(),
builder.getDenseI32ArrayAttr(segmentSizes));		builder.getDenseI32ArrayAttr(segmentSizes));
}		}

StringAttr LaunchFuncOp::getKernelModuleName() {		StringAttr LaunchFuncOp::getKernelModuleName() {
return getKernel().getRootReference();		return getKernel().getRootReference();
}		}

▲ Show 20 Lines • Show All 592 Lines • ▼ Show 20 Lines

} // end anonymous namespace		} // end anonymous namespace

void MemcpyOp::getCanonicalizationPatterns(RewritePatternSet &results,		void MemcpyOp::getCanonicalizationPatterns(RewritePatternSet &results,
MLIRContext *context) {		MLIRContext *context) {
results.add<EraseTrivialCopyOp>(context);		results.add<EraseTrivialCopyOp>(context);
}		}

		void MemcpyOp::build(OpBuilder &odsBuilder, OperationState &odsState,
		/optional/ Type asyncToken, ValueRange asyncDependencies,
		Value dst, Value src, /optional/ Value queue) {
		odsState.addOperands(asyncDependencies);
		odsState.addOperands(dst);
		odsState.addOperands(src);
		if (queue)
		odsState.addOperands(queue);
		(odsState.getOrAddProperties<Properties>().operand_segment_sizes =
		odsBuilder.getDenseI32ArrayAttr(
		{static_cast<int32_t>(asyncDependencies.size()), 1, 1,
		(queue ? 1 : 0)}));
		if (asyncToken)
		odsState.addTypes(asyncToken);
		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// GPU_SubgroupMmaLoadMatrixOp		// GPU_SubgroupMmaLoadMatrixOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

/// Return true if the last dimension of the MemRefType has unit stride. Also		/// Return true if the last dimension of the MemRefType has unit stride. Also
/// return true for memrefs with no strides.		/// return true for memrefs with no strides.
static bool isLastMemrefDimUnitStride(MemRefType type) {		static bool isLastMemrefDimUnitStride(MemRefType type) {
int64_t offset;		int64_t offset;
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines	LogicalResult MemsetOp::fold(FoldAdaptor adaptor,
SmallVectorImpl<::mlir::OpFoldResult> &results) {		SmallVectorImpl<::mlir::OpFoldResult> &results) {
return memref::foldMemRefCast(*this);		return memref::foldMemRefCast(*this);
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// GPU_WaitOp		// GPU_WaitOp
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

		void WaitOp::build(OpBuilder &builder, OperationState &result,
		Type asyncTokenType, ValueRange asyncDependencies,
		Value queue) {
		result.addOperands(asyncDependencies);
		if (asyncTokenType)
		result.types.push_back(builder.getType<AsyncTokenType>());

		if (queue)
		result.addOperands(queue);

		SmallVector<int32_t, 2> segmentSizes(2, 1);
		segmentSizes.front() = asyncDependencies.size();
		segmentSizes.back() = queue ? 1 : 0;
		result.addAttribute(getOperandSegmentSizeAttr(),
		builder.getDenseI32ArrayAttr(segmentSizes));
		}

namespace {		namespace {

/// Remove gpu.wait op use of gpu.wait op def without async dependencies.		/// Remove gpu.wait op use of gpu.wait op def without async dependencies.
/// %t = gpu.wait async [] // No async dependencies.		/// %t = gpu.wait async [] // No async dependencies.
/// ... gpu.wait ... [%t, ...] // %t can be removed.		/// ... gpu.wait ... [%t, ...] // %t can be removed.
struct EraseRedundantGpuWaitOpPairs : public OpRewritePattern<WaitOp> {		struct EraseRedundantGpuWaitOpPairs : public OpRewritePattern<WaitOp> {
public:		public:
using OpRewritePattern::OpRewritePattern;		using OpRewritePattern::OpRewritePattern;

LogicalResult matchAndRewrite(WaitOp op,		LogicalResult matchAndRewrite(WaitOp op,
PatternRewriter &rewriter) const final {		PatternRewriter &rewriter) const final {
auto predicate = [](Value value) {		auto predicate = [](Value value) {
auto waitOp = value.getDefiningOp<WaitOp>();		auto waitOp = value.getDefiningOp<WaitOp>();
return waitOp && waitOp->getNumOperands() == 0;		return waitOp && waitOp.getAsyncDependencies().size() == 0;
};		};

if (llvm::none_of(op.getAsyncDependencies(), predicate))		if (llvm::none_of(op.getAsyncDependencies(), predicate))
return failure();		return failure();

SmallVector<Value> validOperands;		SmallVector<Value> validOperands;
for (Value operand : op->getOperands()) {		for (Value operand : op.getAsyncDependencies()) {
if (predicate(operand))		if (predicate(operand))
continue;		continue;
validOperands.push_back(operand);		validOperands.push_back(operand);
}		}
rewriter.updateRootInPlace(op, [&]() { op->setOperands(validOperands); });
		rewriter.updateRootInPlace(
		op, [&]() { op.getAsyncDependenciesMutable().assign(validOperands); });
return success();		return success();
}		}
};		};

/// Simplify trivial gpu.wait ops for the following patterns.		/// Simplify trivial gpu.wait ops for the following patterns.
/// 1. %t = gpu.wait async ... ops, where %t has no uses (regardless of async		/// 1. %t = gpu.wait async ... ops, where %t has no uses (regardless of async
/// dependencies).		/// dependencies).
/// 2. %t1 = gpu.wait async [%t0], in this case, we can replace uses of %t1 with		/// 2. %t1 = gpu.wait async [%t0], in this case, we can replace uses of %t1 with
▲ Show 20 Lines • Show All 87 Lines • ▼ Show 20 Lines

} // namespace		} // namespace

void AllocOp::getCanonicalizationPatterns(RewritePatternSet &results,		void AllocOp::getCanonicalizationPatterns(RewritePatternSet &results,
MLIRContext *context) {		MLIRContext *context) {
results.add<SimplifyDimOfAllocOp>(context);		results.add<SimplifyDimOfAllocOp>(context);
}		}

		void AllocOp::build(OpBuilder &odsBuilder, OperationState &odsState,
		Type memref,
		/optional/ Type asyncToken, ValueRange asyncDependencies,
		ValueRange dynamicSizes, ValueRange symbolOperands,
		/optional/ bool hostShared,
		/optional/ Value queue) {
		odsState.addOperands(asyncDependencies);
		odsState.addOperands(dynamicSizes);
		odsState.addOperands(symbolOperands);
		if (queue)
		odsState.addOperands(queue);
		(odsState.getOrAddProperties<Properties>().operand_segment_sizes =
		odsBuilder.getDenseI32ArrayAttr(
		{static_cast<int32_t>(asyncDependencies.size()),
		static_cast<int32_t>(dynamicSizes.size()),
		static_cast<int32_t>(symbolOperands.size()), (queue ? 1 : 0)}));
		if (hostShared) {
		odsState.getOrAddProperties<Properties>().hostShared =
		((hostShared) ? odsBuilder.getUnitAttr() : nullptr);
		}
		odsState.addTypes(memref);
		if (asyncToken)
		odsState.addTypes(asyncToken);
		}

		//===----------------------------------------------------------------------===//
		// GPU_DeallocOp
		//===----------------------------------------------------------------------===//

		void DeallocOp::build(OpBuilder &odsBuilder, OperationState &odsState,
		/optional/ Type asyncToken,
		ValueRange asyncDependencies, Value memref,
		/optional/ Value queue) {
		odsState.addOperands(asyncDependencies);
		odsState.addOperands(memref);
		if (queue)
		odsState.addOperands(queue);
		(odsState.getOrAddProperties<Properties>().operand_segment_sizes =
		odsBuilder.getDenseI32ArrayAttr(
		{static_cast<int32_t>(asyncDependencies.size()), 1,
		(queue ? 1 : 0)}));
		if (asyncToken)
		odsState.addTypes(asyncToken);
		}

#include "mlir/Dialect/GPU/IR/GPUOpInterfaces.cpp.inc"		#include "mlir/Dialect/GPU/IR/GPUOpInterfaces.cpp.inc"
#include "mlir/Dialect/GPU/IR/GPUOpsEnums.cpp.inc"		#include "mlir/Dialect/GPU/IR/GPUOpsEnums.cpp.inc"

#define GET_ATTRDEF_CLASSES		#define GET_ATTRDEF_CLASSES
#include "mlir/Dialect/GPU/IR/GPUOpsAttributes.cpp.inc"		#include "mlir/Dialect/GPU/IR/GPUOpsAttributes.cpp.inc"

#define GET_OP_CLASSES		#define GET_OP_CLASSES
#include "mlir/Dialect/GPU/IR/GPUOps.cpp.inc"		#include "mlir/Dialect/GPU/IR/GPUOps.cpp.inc"

mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp

Show First 20 Lines • Show All 141 Lines • ▼ Show 20 Lines	static gpu::AllocOp genAllocMemRef(OpBuilder &builder, Location loc, Value mem,
auto memTp = MemRefType::get(shape, elemTp);		auto memTp = MemRefType::get(shape, elemTp);
SmallVector<Value> dynamicSizes;		SmallVector<Value> dynamicSizes;
for (unsigned r = 0, rank = tp.getRank(); r < rank; r++) {		for (unsigned r = 0, rank = tp.getRank(); r < rank; r++) {
if (shape[r] == ShapedType::kDynamic) {		if (shape[r] == ShapedType::kDynamic) {
Value dimOp = linalg::createOrFoldDimOp(builder, loc, mem, r);		Value dimOp = linalg::createOrFoldDimOp(builder, loc, mem, r);
dynamicSizes.push_back(dimOp);		dynamicSizes.push_back(dimOp);
}		}
}		}
return builder.create<gpu::AllocOp>(loc, TypeRange({memTp, token.getType()}),		return builder.create<gpu::AllocOp>(loc, memTp, token.getType(), token,
token, dynamicSizes, ValueRange());		dynamicSizes,
		/symbolOperands/ ValueRange());
}		}

// Allocates a void buffer on the device with given size.		// Allocates a void buffer on the device with given size.
static gpu::AllocOp genAllocBuffer(OpBuilder &builder, Location loc, Value size,		static gpu::AllocOp genAllocBuffer(OpBuilder &builder, Location loc, Value size,
Value token) {		Value token) {
const auto memTp =		const auto memTp =
MemRefType::get({ShapedType::kDynamic}, builder.getI8Type());		MemRefType::get({ShapedType::kDynamic}, builder.getI8Type());
return builder.create<gpu::AllocOp>(loc, TypeRange({memTp, token.getType()}),		return builder.create<gpu::AllocOp>(loc, memTp, token.getType(), token, size,
token, size, ValueRange());		/symbolOperands/ ValueRange());
}		}

/// Deallocates memory from the device.		/// Deallocates memory from the device.
static Value genDeallocMemRef(OpBuilder &builder, Location loc, Value mem,		static Value genDeallocMemRef(OpBuilder &builder, Location loc, Value mem,
Value token) {		Value token) {
return builder.create<gpu::DeallocOp>(loc, token.getType(), token, mem)		return builder.create<gpu::DeallocOp>(loc, token.getType(), token, mem)
.getAsyncToken();		.getAsyncToken();
}		}
▲ Show 20 Lines • Show All 783 Lines • Show Last 20 Lines

mlir/test/Dialect/GPU/ops.mlir

Show First 20 Lines • Show All 125 Lines • ▼ Show 20 Lines	module attributes {gpu.container_module} {
func.func @foo() {		func.func @foo() {
%0 = "op"() : () -> (f32)		%0 = "op"() : () -> (f32)
%1 = "op"() : () -> (memref<?xf32, 1>)		%1 = "op"() : () -> (memref<?xf32, 1>)
// CHECK: %{{.*}} = arith.constant 8		// CHECK: %{{.*}} = arith.constant 8
%cst = arith.constant 8 : index		%cst = arith.constant 8 : index
%c0 = arith.constant 0 : i32		%c0 = arith.constant 0 : i32
%t0 = gpu.wait async		%t0 = gpu.wait async

		// CHECK: %[[queue:.*]] = gpu.create_queue
		%queue = gpu.create_queue

// CHECK: gpu.launch_func @kernels::@kernel_1 blocks in (%{{.}}, %{{.}}, %{{.}}) threads in (%{{.}}, %{{.}}, %{{.}}) args(%{{.}} : f32, %{{.}} : memref<?xf32, 1>)		// CHECK: gpu.launch_func @kernels::@kernel_1 blocks in (%{{.}}, %{{.}}, %{{.}}) threads in (%{{.}}, %{{.}}, %{{.}}) args(%{{.}} : f32, %{{.}} : memref<?xf32, 1>)
gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) args(%0 : f32, %1 : memref<?xf32, 1>)		gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) args(%0 : f32, %1 : memref<?xf32, 1>)

		gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) queue %queue args(%0 : f32, %1 : memref<?xf32, 1>)

		gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) queue %queue dynamic_shared_memory_size %c0 args(%0 : f32, %1 : memref<?xf32, 1>)

gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) dynamic_shared_memory_size %c0 args(%0 : f32, %1 : memref<?xf32, 1>)		gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) dynamic_shared_memory_size %c0 args(%0 : f32, %1 : memref<?xf32, 1>)

// CHECK: gpu.launch_func @kernels::@kernel_2 blocks in (%{{.}}, %{{.}}, %{{.}}) threads in (%{{.}}, %{{.}}, %{{.}})		// CHECK: gpu.launch_func @kernels::@kernel_2 blocks in (%{{.}}, %{{.}}, %{{.}}) threads in (%{{.}}, %{{.}}, %{{.}})
gpu.launch_func @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)		gpu.launch_func @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)

// CHECK: %{{.}} = gpu.launch_func async [%{{.}}] @kernels::@kernel_2 blocks in (%{{.}}, %{{.}}, %{{.}}) threads in (%{{.}}, %{{.}}, %{{.}})		// CHECK: %{{.}} = gpu.launch_func async [%{{.}}] @kernels::@kernel_2 blocks in (%{{.}}, %{{.}}, %{{.}}) threads in (%{{.}}, %{{.}}, %{{.}})
%t1 = gpu.launch_func async [%t0] @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)		%t1 = gpu.launch_func async [%t0] @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)

// CHECK: %[[VALUES:.*]]:2 = call		// CHECK: %[[VALUES:.*]]:2 = call
%values:2 = func.call @two_value_generator() : () -> (f32, memref<?xf32, 1>)		%values:2 = func.call @two_value_generator() : () -> (f32, memref<?xf32, 1>)
// CHECK: gpu.launch_func @kernels::@kernel_1 {{.*}} args(%[[VALUES]]#0 : f32, %[[VALUES]]#1 : memref<?xf32, 1>)		// CHECK: gpu.launch_func @kernels::@kernel_1 {{.*}} args(%[[VALUES]]#0 : f32, %[[VALUES]]#1 : memref<?xf32, 1>)
gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) args(%values#0 : f32, %values#1 : memref<?xf32, 1>)		gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) args(%values#0 : f32, %values#1 : memref<?xf32, 1>)

		// CHECK: gpu.destroy_queue %[[queue]]
		gpu.destroy_queue %queue
return		return
}		}

gpu.module @gpu_funcs {		gpu.module @gpu_funcs {
// CHECK-LABEL: gpu.func @kernel_1({{.*}}: f32)		// CHECK-LABEL: gpu.func @kernel_1({{.*}}: f32)
// CHECK: workgroup		// CHECK: workgroup
// CHECK: private		// CHECK: private
// CHECK: attributes		// CHECK: attributes
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines	gpu.module @explicit_attributions {
^bb0(%arg0: f32, %arg1: memref<?xf32>, %arg2: memref<5xf32, 3>, %arg3: memref<5xf32, 5>):		^bb0(%arg0: f32, %arg1: memref<?xf32>, %arg2: memref<5xf32, 3>, %arg3: memref<5xf32, 5>):
"gpu.return"() : () -> ()		"gpu.return"() : () -> ()
} ) {function_type = (f32, memref<?xf32>) -> (), gpu.kernel, sym_name = "kernel_1", workgroup_attributions = 1: i64} : () -> ()		} ) {function_type = (f32, memref<?xf32>) -> (), gpu.kernel, sym_name = "kernel_1", workgroup_attributions = 1: i64} : () -> ()
}		}

func.func @alloc() {		func.func @alloc() {
// CHECK-LABEL: func @alloc()		// CHECK-LABEL: func @alloc()


// CHECK: %[[m0:.*]] = gpu.alloc () : memref<13xf32, 1>		// CHECK: %[[m0:.*]] = gpu.alloc () : memref<13xf32, 1>
%m0 = gpu.alloc () : memref<13xf32, 1>		%m0 = gpu.alloc () : memref<13xf32, 1>
// CHECK: gpu.dealloc %[[m0]] : memref<13xf32, 1>		// CHECK: gpu.dealloc %[[m0]] : memref<13xf32, 1>
gpu.dealloc %m0 : memref<13xf32, 1>		gpu.dealloc %m0 : memref<13xf32, 1>

%t0 = gpu.wait async		%t0 = gpu.wait async
// CHECK: %[[m1:.]], %[[t1:.]] = gpu.alloc async [{{.*}}] () : memref<13xf32, 1>		// CHECK: %[[m1:.]], %[[t1:.]] = gpu.alloc async [{{.*}}] () : memref<13xf32, 1>
%m1, %t1 = gpu.alloc async [%t0] () : memref<13xf32, 1>		%m1, %t1 = gpu.alloc async [%t0] () : memref<13xf32, 1>
// CHECK: gpu.dealloc async [%[[t1]]] %[[m1]] : memref<13xf32, 1>		// CHECK: gpu.dealloc async [%[[t1]]] %[[m1]] : memref<13xf32, 1>
%t2 = gpu.dealloc async [%t1] %m1 : memref<13xf32, 1>		%t2 = gpu.dealloc async [%t1] %m1 : memref<13xf32, 1>

// CHECK: %[[m2:.*]] = gpu.alloc host_shared () : memref<13xf32, 1>		// CHECK: %[[m2:.*]] = gpu.alloc host_shared () : memref<13xf32, 1>
%m2 = gpu.alloc host_shared () : memref<13xf32, 1>		%m2 = gpu.alloc host_shared () : memref<13xf32, 1>
// CHECK: gpu.dealloc %[[m2]] : memref<13xf32, 1>		// CHECK: gpu.dealloc %[[m2]] : memref<13xf32, 1>
gpu.dealloc %m2 : memref<13xf32, 1>		gpu.dealloc %m2 : memref<13xf32, 1>

		// CHECK: %[[queue:.*]] = gpu.create_queue
		%queue = gpu.create_queue
		// CHECK: %[[m3:.*]] = gpu.alloc () queue %[[queue]] : memref<13xf32, 1>
		%m3 = gpu.alloc () queue %queue : memref<13xf32, 1>
		// CHECK: gpu.dealloc %[[m3]] queue %[[queue]] : memref<13xf32, 1>
		gpu.dealloc %m3 queue %queue : memref<13xf32, 1>

return		return
}		}

func.func @async_token(%arg0 : !gpu.async.token) -> !gpu.async.token {		func.func @async_token(%arg0 : !gpu.async.token) -> !gpu.async.token {
// CHECK-LABEL: func @async_token({{.*}}: !gpu.async.token)		// CHECK-LABEL: func @async_token({{.*}}: !gpu.async.token)
// CHECK: return {{.*}} : !gpu.async.token		// CHECK: return {{.*}} : !gpu.async.token
return %arg0 : !gpu.async.token		return %arg0 : !gpu.async.token
}		}
Show All 18 Lines	module attributes {gpu.container_module} {
func.func @memcpy(%dst : memref<3x7xf32>, %src : memref<3x7xf32, 1>) {		func.func @memcpy(%dst : memref<3x7xf32>, %src : memref<3x7xf32, 1>) {
// CHECK-LABEL: func @memcpy		// CHECK-LABEL: func @memcpy
// CHECK: gpu.memcpy {{.}}, {{.}} : memref<3x7xf32>, memref<3x7xf32, 1>		// CHECK: gpu.memcpy {{.}}, {{.}} : memref<3x7xf32>, memref<3x7xf32, 1>
gpu.memcpy %dst, %src : memref<3x7xf32>, memref<3x7xf32, 1>		gpu.memcpy %dst, %src : memref<3x7xf32>, memref<3x7xf32, 1>
// CHECK: %[[t0:.*]] = gpu.wait async		// CHECK: %[[t0:.*]] = gpu.wait async
%0 = gpu.wait async		%0 = gpu.wait async
// CHECK: {{.}} = gpu.memcpy async [%[[t0]]] {{.}}, {{.*}} : memref<3x7xf32>, memref<3x7xf32, 1>		// CHECK: {{.}} = gpu.memcpy async [%[[t0]]] {{.}}, {{.*}} : memref<3x7xf32>, memref<3x7xf32, 1>
%1 = gpu.memcpy async [%0] %dst, %src : memref<3x7xf32>, memref<3x7xf32, 1>		%1 = gpu.memcpy async [%0] %dst, %src : memref<3x7xf32>, memref<3x7xf32, 1>

		// CHECK: %[[queue:.*]] = gpu.create_queue
		%queue = gpu.create_queue
		// CHECK: gpu.memcpy {{.}}, {{.}} queue %[[queue]] : memref<3x7xf32>, memref<3x7xf32, 1>
		gpu.memcpy %dst, %src queue %queue : memref<3x7xf32>, memref<3x7xf32, 1>
		// CHECK: %[[t1:.*]] = gpu.wait async queue %[[queue]]
		%2 = gpu.wait async queue %queue
		// CHECK: {{.}} = gpu.memcpy async [%[[t1]]] {{.}}, {{.*}} queue %[[queue]] : memref<3x7xf32>, memref<3x7xf32, 1>
		%3 = gpu.memcpy async [%2] %dst, %src queue %queue : memref<3x7xf32>, memref<3x7xf32, 1>

return		return
}		}

func.func @memset(%dst : memref<3x7xf32>, %value : f32) {		func.func @memset(%dst : memref<3x7xf32>, %value : f32) {
// CHECK-LABEL: func @memset		// CHECK-LABEL: func @memset
// CHECK: gpu.memset {{.}}, {{.}} : memref<3x7xf32>, f32		// CHECK: gpu.memset {{.}}, {{.}} : memref<3x7xf32>, f32
gpu.memset %dst, %value : memref<3x7xf32>, f32		gpu.memset %dst, %value : memref<3x7xf32>, f32
// CHECK: %[[t0:.*]] = gpu.wait async		// CHECK: %[[t0:.*]] = gpu.wait async
%0 = gpu.wait async		%0 = gpu.wait async
// CHECK: {{.}} = gpu.memset async [%[[t0]]] {{.}}, {{.*}} : memref<3x7xf32>, f32		// CHECK: {{.}} = gpu.memset async [%[[t0]]] {{.}}, {{.*}} : memref<3x7xf32>, f32
%1 = gpu.memset async [%0] %dst, %value : memref<3x7xf32>, f32		%1 = gpu.memset async [%0] %dst, %value : memref<3x7xf32>, f32

		// CHECK: %[[queue:.*]] = gpu.create_queue
		%queue = gpu.create_queue
		// CHECK: gpu.memset {{.}}, {{.}} queue %[[queue]] : memref<3x7xf32>, f32
		gpu.memset %dst, %value queue %queue : memref<3x7xf32>, f32
		// CHECK: %[[t1:.*]] = gpu.wait async queue %[[queue]]
		%2 = gpu.wait async queue %queue
		// CHECK: {{.}} = gpu.memset async [%[[t1]]] {{.}}, {{.*}} queue %[[queue]] : memref<3x7xf32>, f32
		%3 = gpu.memset async [%2] %dst, %value queue %queue : memref<3x7xf32>, f32

return		return
}		}

func.func @mmamatrix_valid_scalar_element_type(%src : memref<32x32xf16, affine_map<(d0, d1) -> (d0 * 64 + d1)>>){		func.func @mmamatrix_valid_scalar_element_type(%src : memref<32x32xf16, affine_map<(d0, d1) -> (d0 * 64 + d1)>>){
// CHECK-LABEL: func @mmamatrix_valid_scalar_element_type		// CHECK-LABEL: func @mmamatrix_valid_scalar_element_type
%wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>		%wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>
// CHECK: %[[wg:.*]] = memref.alloca()		// CHECK: %[[wg:.*]] = memref.alloca()
%i = arith.constant 16 : index		%i = arith.constant 16 : index
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines	func.func @sparse_ops(%arg0: index) {
// CHECK: gpu.destroy_sp_mat async		// CHECK: gpu.destroy_sp_mat async
%token15 = gpu.destroy_sp_mat async [%token14] %spmat		%token15 = gpu.destroy_sp_mat async [%token14] %spmat
// CHECK: gpu.destroy_dn_tensor async		// CHECK: gpu.destroy_dn_tensor async
%token16 = gpu.destroy_dn_tensor async [%token15] %dnvec		%token16 = gpu.destroy_dn_tensor async [%token15] %dnvec
// CHECK: gpu.wait		// CHECK: gpu.wait
gpu.wait [%token16]		gpu.wait [%token16]
return		return
}		}

		// CHECK-LABEL: func @create_queue_with_desc
		func.func @create_queue_with_desc() {
		// CHECK: gpu.create_queue "test"
		%queue = gpu.create_queue "test"
		return
		}
}		}

// Just check that this doesn't crash.		// Just check that this doesn't crash.
gpu.module @module {		gpu.module @module {
"gpu.func"() ({		"gpu.func"() ({
gpu.return		gpu.return
}) {function_type = () -> (), sym_name = "func"} : () -> ()		}) {function_type = () -> (), sym_name = "func"} : () -> ()
}		}

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][GPU] Add gpu.create_queue op and add queue as an optional argument to gpu ops
Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 538461

mlir/include/mlir/Dialect/GPU/IR/GPUBase.td

mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp

mlir/test/Dialect/GPU/ops.mlir

This is an archive of the discontinued LLVM Phabricator instance.

[mlir][GPU] Add gpu.create_queue op and add queue as an optional argument to gpu opsNeeds ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 538461

mlir/include/mlir/Dialect/GPU/IR/GPUBase.td

mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp

mlir/test/Dialect/GPU/ops.mlir

[mlir][GPU] Add gpu.create_queue op and add queue as an optional argument to gpu ops
Needs ReviewPublic