This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
mlir/
-
include/mlir/Dialect/GPU/IR/
-
mlir/
-
Dialect/
-
GPU/
-
IR/
-
GPUOps.td
-
lib/
-
Conversion/GPUCommon/
-
GPUCommon/
-
GPUOpsLowering.cpp
-
Dialect/GPU/IR/
-
GPU/
-
IR/
-
GPUDialect.cpp
-
test/Conversion/GPUCommon/
-
Conversion/
-
GPUCommon/
-
memory-attrbution.mlir

Differential D148965

[mlir][GPU] Allow specifying alignment of memory attributions
ClosedPublic

Authored by krzysz00 on Apr 21 2023, 2:48 PM.

Download Raw Diff

Details

Reviewers

bondhugula
ThomasRaoux
nicolasvasilache
herhut
nirvedhmeshram

Commits

rG94058c41d43a: [mlir][GPU] Allow specifying alignment of memory attributions

Summary

Add support for argument attributes on workgroup and private
attributions for GPU functions. These arguments are outside the range
of getNumArguments() and get printed separately, so the default
mechanism for function argument attributes can't be used on them.

Having done this, check for the llvm.align attribute on workgroup or
private attributions in a gpu.func and pass it through to the
relevant allocation op (creating a global or alloca). This allows
people creating kernels that use multiple workgroup buffers to set an
alignment.

(This could, in the future, be a GPU dialect alignment attribute,
but I've taken the simpler route of using the LLVM version instead for
simplicity and because I don't know how this might impact backends
like Vulkan)

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

krzysz00 created this revision.Apr 21 2023, 2:48 PM

Herald added a reviewer: bondhugula. · View Herald TranscriptApr 21 2023, 2:48 PM

Herald added a reviewer: ThomasRaoux. · View Herald Transcript

Herald added a project: Restricted Project. · View Herald Transcript

Herald added subscribers: bviyer, Moerafaat, zero9178 and 23 others. · View Herald Transcript

krzysz00 requested review of this revision.Apr 21 2023, 2:48 PM

Herald added a reviewer: nicolasvasilache. · View Herald TranscriptApr 21 2023, 2:48 PM

Herald added a reviewer: herhut. · View Herald Transcript

Herald added a project: Restricted Project. · View Herald Transcript

Herald added subscribers: stephenneuendorffer, nicolasvasilache. · View Herald Transcript

Harbormaster completed remote builds in B227334: Diff 515925.Apr 21 2023, 3:07 PM

nirvedhmeshram accepted this revision.May 3 2023, 10:44 AM

This revision is now accepted and ready to land.May 3 2023, 10:44 AM

Rebase

This revision was landed with ongoing or failed builds.May 3 2023, 2:51 PM

Closed by commit rG94058c41d43a: [mlir][GPU] Allow specifying alignment of memory attributions (authored by krzysz00). · Explain Why

This revision was automatically updated to reflect the committed changes.

krzysz00 added a commit: rG94058c41d43a: [mlir][GPU] Allow specifying alignment of memory attributions.

Harbormaster completed remote builds in B229825: Diff 519263.May 3 2023, 3:41 PM

Revision Contents

Path

Size

mlir/

include/

mlir/

Dialect/

GPU/

IR/

GPUOps.td

70 lines

lib/

Conversion/

GPUCommon/

GPUOpsLowering.cpp

28 lines

Dialect/

GPU/

IR/

GPUDialect.cpp

206 lines

test/

Conversion/

GPUCommon/

memory-attrbution.mlir

31 lines

Diff 519266

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Show First 20 Lines • Show All 246 Lines • ▼ Show 20 Lines	let description = [{
```		```

Note the non-default memory spaces used in memref types in memory		Note the non-default memory spaces used in memref types in memory
attribution.		attribution.
}];		}];

let arguments = (ins TypeAttrOf<FunctionType>:$function_type,		let arguments = (ins TypeAttrOf<FunctionType>:$function_type,
OptionalAttr<DictArrayAttr>:$arg_attrs,		OptionalAttr<DictArrayAttr>:$arg_attrs,
OptionalAttr<DictArrayAttr>:$res_attrs);		OptionalAttr<DictArrayAttr>:$res_attrs,
		OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
		OptionalAttr<DictArrayAttr>:$private_attrib_attrs);
let regions = (region AnyRegion:$body);		let regions = (region AnyRegion:$body);

let skipDefaultBuilders = 1;		let skipDefaultBuilders = 1;

let builders = [		let builders = [
OpBuilder<(ins "StringRef":$name, "FunctionType":$type,		OpBuilder<(ins "StringRef":$name, "FunctionType":$type,
CArg<"TypeRange", "{}">:$workgroupAttributions,		CArg<"TypeRange", "{}">:$workgroupAttributions,
CArg<"TypeRange", "{}">:$privateAttributions,		CArg<"TypeRange", "{}">:$privateAttributions,
Show All 10 Lines	let extraClassDeclaration = [{

/// Returns the number of buffers located in the workgroup memory.		/// Returns the number of buffers located in the workgroup memory.
unsigned getNumWorkgroupAttributions() {		unsigned getNumWorkgroupAttributions() {
auto attr = (*this)->getAttrOfType<IntegerAttr>(		auto attr = (*this)->getAttrOfType<IntegerAttr>(
getNumWorkgroupAttributionsAttrName());		getNumWorkgroupAttributionsAttrName());
return attr ? attr.getInt() : 0;		return attr ? attr.getInt() : 0;
}		}

		/// Return the index of the first workgroup attribution in the block argument
		/// list.
		unsigned getFirstWorkgroupAttributionIndex() {
		return getFunctionType().getNumInputs();
		}

/// Returns a list of block arguments that correspond to buffers located in		/// Returns a list of block arguments that correspond to buffers located in
/// the workgroup memory		/// the workgroup memory
ArrayRef<BlockArgument> getWorkgroupAttributions() {		ArrayRef<BlockArgument> getWorkgroupAttributions() {
auto begin =		auto begin =
std::next(getBody().args_begin(), getFunctionType().getNumInputs());		std::next(getBody().args_begin(), getFirstWorkgroupAttributionIndex());
auto end = std::next(begin, getNumWorkgroupAttributions());		auto end = std::next(begin, getNumWorkgroupAttributions());
return {begin, end};		return {begin, end};
}		}

/// Adds a new block argument that corresponds to buffers located in		/// Adds a new block argument that corresponds to buffers located in
/// workgroup memory.		/// workgroup memory.
BlockArgument addWorkgroupAttribution(Type type, Location loc);		BlockArgument addWorkgroupAttribution(Type type, Location loc);

		/// Get the workgroup attribution attribute dictionary for the attribution
		/// at index `index`, counted from the start of the workgroup attributions.
		DictionaryAttr getworkgroupAttributionAttrs(unsigned index);

		/// Set the workgroup attribution attribute dictionary for the attribution
		/// at index `index`, counted from the start of the workgroup attributions.
		void setworkgroupAttributionAttrs(unsigned index, DictionaryAttr value);

		/// Get an attribute for a workgroup attribution. `index` is counted
		/// from the start of the workgroup attributions, not the start of the block.
		Attribute getWorkgroupAttributionAttr(unsigned index, StringAttr name);
		Attribute getWorkgroupAttributionAttr(unsigned index, StringRef name) {
		return getWorkgroupAttributionAttr(index, StringAttr::get((*this)->getContext(), name));
		}

		/// Set an attribute for a workgroup attribution. `index` is counted
		/// from the start of the workgroup attributions, not the start of the block.
		/// A null `value` removes an attributino attribute.
		void setWorkgroupAttributionAttr(unsigned index, StringAttr name, Attribute value);
		void setWorkgroupAttributionAttr(unsigned index, StringRef name, Attribute value) {
		return setWorkgroupAttributionAttr(index, StringAttr::get((*this)->getContext(), name), value);
		}

/// Returns the number of buffers located in the private memory.		/// Returns the number of buffers located in the private memory.
unsigned getNumPrivateAttributions() {		unsigned getNumPrivateAttributions() {
return getBody().getNumArguments() - getFunctionType().getNumInputs() -		return getBody().getNumArguments() - getFunctionType().getNumInputs() -
getNumWorkgroupAttributions();		getNumWorkgroupAttributions();
}		}

		/// Returns the index of the first private buffer in the block argument list.
		unsigned getFirstPrivateAttributionIndex() {
		// Buffers on the private memory always come after buffers on the workgroup
		// memory.
		return getFunctionType().getNumInputs() + getNumWorkgroupAttributions();
		}

/// Returns a list of block arguments that correspond to buffers located in		/// Returns a list of block arguments that correspond to buffers located in
/// the private memory.		/// the private memory.
ArrayRef<BlockArgument> getPrivateAttributions() {		ArrayRef<BlockArgument> getPrivateAttributions() {
// Buffers on the private memory always come after buffers on the workgroup
// memory.
auto begin =		auto begin =
std::next(getBody().args_begin(),		std::next(getBody().args_begin(), getFirstPrivateAttributionIndex());
getFunctionType().getNumInputs() + getNumWorkgroupAttributions());
return {begin, getBody().args_end()};		return {begin, getBody().args_end()};
}		}

/// Adds a new block argument that corresponds to buffers located in		/// Adds a new block argument that corresponds to buffers located in
/// private memory.		/// private memory.
BlockArgument addPrivateAttribution(Type type, Location loc);		BlockArgument addPrivateAttribution(Type type, Location loc);

		/// Get the private attribution attribute dictionary for the attribution
		/// at index `index`, counted from the start of the private attributions.
		DictionaryAttr getPrivateAttributionAttrs(unsigned index);

		/// Set the private attribution attribute dictionary for the attribution
		/// at index `index`, counted from the start of the private attributions.
		void setPrivateAttributionAttrs(unsigned index, DictionaryAttr value);

		/// Get an attribute for a private attribution. `index` is counted
		/// from the start of the private attributions, not the start of the block.
		Attribute getPrivateAttributionAttr(unsigned index, StringAttr name);
		Attribute getPrivateAttributionAttr(unsigned index, StringRef name) {
		return getPrivateAttributionAttr(index, StringAttr::get((*this)->getContext(), name));
		}

		/// Set an attribute for a private attribution. `index` is counted
		/// from the start of the private attributions, not the start of the block.
		/// A null `value` removes an attribute.
		void setPrivateAttributionAttr(unsigned index, StringAttr name, Attribute value);
		void setPrivateAttributionAttr(unsigned index, StringRef name, Attribute value) {
		return setPrivateAttributionAttr(index, StringAttr::get((*this)->getContext(), name), value);
		}

/// Returns the name of the attribute containing the number of buffers		/// Returns the name of the attribute containing the number of buffers
/// located in the workgroup memory.		/// located in the workgroup memory.
static StringRef getNumWorkgroupAttributionsAttrName() {		static StringRef getNumWorkgroupAttributionsAttrName() {
return "workgroup_attributions";		return "workgroup_attributions";
}		}

static constexpr StringLiteral getKnownBlockSizeAttrName() {		static constexpr StringLiteral getKnownBlockSizeAttrName() {
return StringLiteral("gpu.known_block_size");		return StringLiteral("gpu.known_block_size");
▲ Show 20 Lines • Show All 1,151 Lines • Show Last 20 Lines

mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp

Show All 18 Lines
LogicalResult		LogicalResult
GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,		GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {		ConversionPatternRewriter &rewriter) const {
Location loc = gpuFuncOp.getLoc();		Location loc = gpuFuncOp.getLoc();

SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;		SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());		workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
for (const auto &en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {		for (const auto &en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
Value attribution = en.value();		BlockArgument attribution = en.value();

auto type = attribution.getType().dyn_cast<MemRefType>();		auto type = attribution.getType().dyn_cast<MemRefType>();
assert(type && type.hasStaticShape() && "unexpected type in attribution");		assert(type && type.hasStaticShape() && "unexpected type in attribution");

uint64_t numElements = type.getNumElements();		uint64_t numElements = type.getNumElements();

auto elementType =		auto elementType =
typeConverter->convertType(type.getElementType()).template cast<Type>();		typeConverter->convertType(type.getElementType()).template cast<Type>();
auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);		auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
std::string name = std::string(		std::string name = std::string(
llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index()));		llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index()));
		uint64_t alignment = 0;
		if (auto alignAttr =
		gpuFuncOp
		.getWorkgroupAttributionAttr(
		en.index(), LLVM::LLVMDialect::getAlignAttrName())
		.dyn_cast_or_null<IntegerAttr>())
		alignment = alignAttr.getInt();
auto globalOp = rewriter.create<LLVM::GlobalOp>(		auto globalOp = rewriter.create<LLVM::GlobalOp>(
gpuFuncOp.getLoc(), arrayType, /isConstant=/false,		gpuFuncOp.getLoc(), arrayType, /isConstant=/false,
LLVM::Linkage::Internal, name, /value=/Attribute(),		LLVM::Linkage::Internal, name, /value=/Attribute(), alignment,
/alignment=/0, workgroupAddrSpace);		workgroupAddrSpace);
workgroupBuffers.push_back(globalOp);		workgroupBuffers.push_back(globalOp);
}		}

// Remap proper input types.		// Remap proper input types.
TypeConverter::SignatureConversion signatureConversion(		TypeConverter::SignatureConversion signatureConversion(
gpuFuncOp.front().getNumArguments());		gpuFuncOp.front().getNumArguments());
Type funcType = getTypeConverter()->convertFunctionSignature(		Type funcType = getTypeConverter()->convertFunctionSignature(
gpuFuncOp.getFunctionType(), /isVariadic=/false,		gpuFuncOp.getFunctionType(), /isVariadic=/false,
getTypeConverter()->getOptions().useBarePtrCallConv, signatureConversion);		getTypeConverter()->getOptions().useBarePtrCallConv, signatureConversion);

// Create the new function operation. Only copy those attributes that are		// Create the new function operation. Only copy those attributes that are
// not specific to function modeling.		// not specific to function modeling.
SmallVector<NamedAttribute, 4> attributes;		SmallVector<NamedAttribute, 4> attributes;
for (const auto &attr : gpuFuncOp->getAttrs()) {		for (const auto &attr : gpuFuncOp->getAttrs()) {
if (attr.getName() == SymbolTable::getSymbolAttrName() \|\|		if (attr.getName() == SymbolTable::getSymbolAttrName() \|\|
attr.getName() == gpuFuncOp.getFunctionTypeAttrName() \|\|		attr.getName() == gpuFuncOp.getFunctionTypeAttrName() \|\|
attr.getName() == gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName())		attr.getName() ==
		gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() \|\|
		attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() \|\|
		attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName())
continue;		continue;
attributes.push_back(attr);		attributes.push_back(attr);
}		}
// Add a dialect specific kernel attribute in addition to GPU kernel		// Add a dialect specific kernel attribute in addition to GPU kernel
// attribute. The former is necessary for further translation while the		// attribute. The former is necessary for further translation while the
// latter is expected by gpu.launch_func.		// latter is expected by gpu.launch_func.
if (gpuFuncOp.isKernel())		if (gpuFuncOp.isKernel())
attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());		attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	for (const auto &en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
// Explicitly drop memory space when lowering private memory		// Explicitly drop memory space when lowering private memory
// attributions since NVVM models it as `alloca`s in the default		// attributions since NVVM models it as `alloca`s in the default
// memory space and does not support `alloca`s with addrspace(5).		// memory space and does not support `alloca`s with addrspace(5).
Type elementType = typeConverter->convertType(type.getElementType());		Type elementType = typeConverter->convertType(type.getElementType());
auto ptrType =		auto ptrType =
getTypeConverter()->getPointerType(elementType, allocaAddrSpace);		getTypeConverter()->getPointerType(elementType, allocaAddrSpace);
Value numElements = rewriter.create<LLVM::ConstantOp>(		Value numElements = rewriter.create<LLVM::ConstantOp>(
gpuFuncOp.getLoc(), int64Ty, type.getNumElements());		gpuFuncOp.getLoc(), int64Ty, type.getNumElements());
		uint64_t alignment = 0;
		if (auto alignAttr =
		gpuFuncOp
		.getPrivateAttributionAttr(
		en.index(), LLVM::LLVMDialect::getAlignAttrName())
		.dyn_cast_or_null<IntegerAttr>())
		alignment = alignAttr.getInt();
Value allocated = rewriter.create<LLVM::AllocaOp>(		Value allocated = rewriter.create<LLVM::AllocaOp>(
gpuFuncOp.getLoc(), ptrType, elementType, numElements,		gpuFuncOp.getLoc(), ptrType, elementType, numElements, alignment);
/alignment=/0);
auto descr = MemRefDescriptor::fromStaticShape(		auto descr = MemRefDescriptor::fromStaticShape(
rewriter, loc, *getTypeConverter(), type, allocated);		rewriter, loc, *getTypeConverter(), type, allocated);
signatureConversion.remapInput(		signatureConversion.remapInput(
numProperArguments + numWorkgroupAttributions + en.index(), descr);		numProperArguments + numWorkgroupAttributions + en.index(), descr);
}		}
}		}

// Move the region to the new function, update the entry block signature.		// Move the region to the new function, update the entry block signature.
▲ Show 20 Lines • Show All 361 Lines • Show Last 20 Lines

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

Show First 20 Lines • Show All 1,011 Lines • ▼ Show 20 Lines	void GPUFuncOp::build(OpBuilder &builder, OperationState &result,
for (Type argTy : workgroupAttributions)		for (Type argTy : workgroupAttributions)
entryBlock->addArgument(argTy, result.location);		entryBlock->addArgument(argTy, result.location);
for (Type argTy : privateAttributions)		for (Type argTy : privateAttributions)
entryBlock->addArgument(argTy, result.location);		entryBlock->addArgument(argTy, result.location);

body->getBlocks().push_back(entryBlock);		body->getBlocks().push_back(entryBlock);
}		}

		/// Parses a GPU function memory attribution.
		///
		/// memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
		/// (`private` `(` ssa-id-and-type-list `)`)?
		///
		/// Note that this function parses only one of the two similar parts, with the
		/// keyword provided as argument.
		static ParseResult
		parseAttributions(OpAsmParser &parser, StringRef keyword,
		SmallVectorImpl<OpAsmParser::Argument> &args,
		Attribute &attributionAttrs) {
		// If we could not parse the keyword, just assume empty list and succeed.
		if (failed(parser.parseOptionalKeyword(keyword)))
		return success();

		size_t existingArgs = args.size();
		ParseResult result =
		parser.parseArgumentList(args, OpAsmParser::Delimiter::Paren,
		/allowType=/true, /allowAttrs=/true);
		if (failed(result))
		return result;

		bool hadAttrs = llvm::any_of(ArrayRef(args).drop_front(existingArgs),
		[](const OpAsmParser::Argument &arg) -> bool {
		return arg.attrs && !arg.attrs.empty();
		});
		if (!hadAttrs) {
		attributionAttrs = nullptr;
		return result;
		}

		Builder &builder = parser.getBuilder();
		SmallVector<Attribute> attributionAttrsVec;
		for (const auto &argument : ArrayRef(args).drop_front(existingArgs)) {
		if (!argument.attrs)
		attributionAttrsVec.push_back(builder.getDictionaryAttr({}));
		else
		attributionAttrsVec.push_back(argument.attrs);
		}
		attributionAttrs = builder.getArrayAttr(attributionAttrsVec);
		return result;
		}

/// Parses a GPU function.		/// Parses a GPU function.
///		///
/// <operation> ::= `gpu.func` symbol-ref-id `(` argument-list `)`		/// <operation> ::= `gpu.func` symbol-ref-id `(` argument-list `)`
/// (`->` function-result-list)? memory-attribution `kernel`?		/// (`->` function-result-list)? memory-attribution `kernel`?
/// function-attributes? region		/// function-attributes? region
ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) {		ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) {
SmallVector<OpAsmParser::Argument> entryArgs;		SmallVector<OpAsmParser::Argument> entryArgs;
SmallVector<DictionaryAttr> resultAttrs;		SmallVector<DictionaryAttr> resultAttrs;
Show All 26 Lines	ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) {
auto type = builder.getFunctionType(argTypes, resultTypes);		auto type = builder.getFunctionType(argTypes, resultTypes);
result.addAttribute(getFunctionTypeAttrName(result.name),		result.addAttribute(getFunctionTypeAttrName(result.name),
TypeAttr::get(type));		TypeAttr::get(type));

function_interface_impl::addArgAndResultAttrs(		function_interface_impl::addArgAndResultAttrs(
builder, result, entryArgs, resultAttrs, getArgAttrsAttrName(result.name),		builder, result, entryArgs, resultAttrs, getArgAttrsAttrName(result.name),
getResAttrsAttrName(result.name));		getResAttrsAttrName(result.name));

		Attribute workgroupAttributionAttrs;
// Parse workgroup memory attributions.		// Parse workgroup memory attributions.
if (failed(parseAttributions(parser, GPUFuncOp::getWorkgroupKeyword(),		if (failed(parseAttributions(parser, GPUFuncOp::getWorkgroupKeyword(),
entryArgs)))		entryArgs, workgroupAttributionAttrs)))
return failure();		return failure();

// Store the number of operands we just parsed as the number of workgroup		// Store the number of operands we just parsed as the number of workgroup
// memory attributions.		// memory attributions.
unsigned numWorkgroupAttrs = entryArgs.size() - type.getNumInputs();		unsigned numWorkgroupAttrs = entryArgs.size() - type.getNumInputs();
result.addAttribute(GPUFuncOp::getNumWorkgroupAttributionsAttrName(),		result.addAttribute(GPUFuncOp::getNumWorkgroupAttributionsAttrName(),
builder.getI64IntegerAttr(numWorkgroupAttrs));		builder.getI64IntegerAttr(numWorkgroupAttrs));
		if (workgroupAttributionAttrs)
		result.addAttribute(GPUFuncOp::getWorkgroupAttribAttrsAttrName(result.name),
		workgroupAttributionAttrs);

		Attribute privateAttributionAttrs;
// Parse private memory attributions.		// Parse private memory attributions.
if (failed(		if (failed(parseAttributions(parser, GPUFuncOp::getPrivateKeyword(),
parseAttributions(parser, GPUFuncOp::getPrivateKeyword(), entryArgs)))		entryArgs, privateAttributionAttrs)))
return failure();		return failure();
		if (privateAttributionAttrs)
		result.addAttribute(GPUFuncOp::getPrivateAttribAttrsAttrName(result.name),
		privateAttributionAttrs);

// Parse the kernel attribute if present.		// Parse the kernel attribute if present.
if (succeeded(parser.parseOptionalKeyword(GPUFuncOp::getKernelKeyword())))		if (succeeded(parser.parseOptionalKeyword(GPUFuncOp::getKernelKeyword())))
result.addAttribute(GPUDialect::getKernelFuncAttrName(),		result.addAttribute(GPUDialect::getKernelFuncAttrName(),
builder.getUnitAttr());		builder.getUnitAttr());

// Parse attributes.		// Parse attributes.
if (failed(parser.parseOptionalAttrDictWithKeyword(result.attributes)))		if (failed(parser.parseOptionalAttrDictWithKeyword(result.attributes)))
return failure();		return failure();

// Parse the region. If no argument names were provided, take all names		// Parse the region. If no argument names were provided, take all names
// (including those of attributions) from the entry block.		// (including those of attributions) from the entry block.
auto *body = result.addRegion();		auto *body = result.addRegion();
return parser.parseRegion(*body, entryArgs);		return parser.parseRegion(*body, entryArgs);
}		}

		static void printAttributions(OpAsmPrinter &p, StringRef keyword,
		ArrayRef<BlockArgument> values,
		ArrayAttr attributes) {
		if (values.empty())
		return;

		p << ' ' << keyword << '(';
		llvm::interleaveComma(
		llvm::enumerate(values), p, [&p, attributes](auto pair) {
		BlockArgument v = pair.value();
		p << v << " : " << v.getType();

		size_t attributionIndex = pair.index();
		DictionaryAttr attrs;
		if (attributes && attributionIndex < attributes.size())
		attrs = attributes[attributionIndex].cast<DictionaryAttr>();
		if (attrs)
		p.printOptionalAttrDict(attrs.getValue());
		});
		p << ')';
		}

void GPUFuncOp::print(OpAsmPrinter &p) {		void GPUFuncOp::print(OpAsmPrinter &p) {
p << ' ';		p << ' ';
p.printSymbolName(getName());		p.printSymbolName(getName());

FunctionType type = getFunctionType();		FunctionType type = getFunctionType();
function_interface_impl::printFunctionSignature(p, *this, type.getInputs(),		function_interface_impl::printFunctionSignature(p, *this, type.getInputs(),
/isVariadic=/false,		/isVariadic=/false,
type.getResults());		type.getResults());

printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());		printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions(),
printAttributions(p, getPrivateKeyword(), getPrivateAttributions());		getWorkgroupAttribAttrs().value_or(nullptr));
		printAttributions(p, getPrivateKeyword(), getPrivateAttributions(),
		getPrivateAttribAttrs().value_or(nullptr));
if (isKernel())		if (isKernel())
p << ' ' << getKernelKeyword();		p << ' ' << getKernelKeyword();

function_interface_impl::printFunctionAttributes(		function_interface_impl::printFunctionAttributes(
p, *this,		p, *this,
{getNumWorkgroupAttributionsAttrName(),		{getNumWorkgroupAttributionsAttrName(),
GPUDialect::getKernelFuncAttrName(), getFunctionTypeAttrName(),		GPUDialect::getKernelFuncAttrName(), getFunctionTypeAttrName(),
getArgAttrsAttrName(), getResAttrsAttrName()});		getArgAttrsAttrName(), getResAttrsAttrName(),
		getWorkgroupAttribAttrsAttrName(), getPrivateAttribAttrsAttrName()});
p << ' ';		p << ' ';
p.printRegion(getBody(), /printEntryBlockArgs=/false);		p.printRegion(getBody(), /printEntryBlockArgs=/false);
}		}

		static DictionaryAttr getAttributionAttrs(GPUFuncOp op, unsigned index,
		StringAttr attrName) {
		auto allAttrs = op->getAttr(attrName).dyn_cast_or_null<ArrayAttr>();
		if (!allAttrs \|\| index >= allAttrs.size())
		return DictionaryAttr();
		return allAttrs[index].cast<DictionaryAttr>();
		}

		DictionaryAttr GPUFuncOp::getworkgroupAttributionAttrs(unsigned index) {
		return getAttributionAttrs(*this, index, getWorkgroupAttribAttrsAttrName());
		}

		DictionaryAttr GPUFuncOp::getPrivateAttributionAttrs(unsigned index) {
		return getAttributionAttrs(*this, index, getPrivateAttribAttrsAttrName());
		}

		static void setAttributionAttrs(GPUFuncOp op, unsigned index,
		DictionaryAttr value, StringAttr attrName) {
		MLIRContext *ctx = op.getContext();
		auto allAttrs = op->getAttr(attrName).dyn_cast_or_null<ArrayAttr>();
		SmallVector<Attribute> elements;
		if (allAttrs)
		elements.append(allAttrs.begin(), allAttrs.end());
		while (elements.size() <= index)
		elements.push_back(DictionaryAttr::get(ctx));
		if (!value)
		elements[index] = DictionaryAttr::get(ctx);
		else
		elements[index] = value;
		ArrayAttr newValue = ArrayAttr::get(ctx, elements);
		op->setAttr(attrName, newValue);
		}

		void GPUFuncOp::setworkgroupAttributionAttrs(unsigned index,
		DictionaryAttr value) {
		setAttributionAttrs(*this, index, value, getWorkgroupAttribAttrsAttrName());
		}

		void GPUFuncOp::setPrivateAttributionAttrs(unsigned int index,
		DictionaryAttr value) {
		setAttributionAttrs(*this, index, value, getPrivateAttribAttrsAttrName());
		}

		static Attribute getAttributionAttr(GPUFuncOp op, unsigned index,
		StringAttr name, StringAttr attrsName) {
		DictionaryAttr dict = getAttributionAttrs(op, index, attrsName);
		if (!dict)
		return Attribute();
		return dict.get(name);
		}

		Attribute GPUFuncOp::getWorkgroupAttributionAttr(unsigned index,
		StringAttr name) {
		assert(index < getNumWorkgroupAttributions() &&
		"index must map to a workgroup attribution");
		return getAttributionAttr(*this, index, name,
		getWorkgroupAttribAttrsAttrName());
		}

		Attribute GPUFuncOp::getPrivateAttributionAttr(unsigned index,
		StringAttr name) {
		assert(index < getNumPrivateAttributions() &&
		"index must map to a private attribution");
		return getAttributionAttr(*this, index, name,
		getPrivateAttribAttrsAttrName());
		}

		static void setAttributionAttr(GPUFuncOp op, unsigned index, StringAttr name,
		Attribute value, StringAttr attrsName) {
		MLIRContext *ctx = op.getContext();
		SmallVector<NamedAttribute> elems;
		DictionaryAttr oldDict = getAttributionAttrs(op, index, attrsName);
		if (oldDict)
		elems.append(oldDict.getValue().begin(), oldDict.getValue().end());

		bool found = false;
		bool mustSort = true;
		for (unsigned i = 0, e = elems.size(); i < e; ++i) {
		if (elems[i].getName() == name) {
		found = true;
		if (!value) {
		std::swap(elems[i], elems[elems.size() - 1]);
		elems.pop_back();
		} else {
		mustSort = false;
		elems[i] = NamedAttribute(elems[i].getName(), value);
		}
		break;
		}
		}
		if (!found) {
		if (!value)
		return;
		elems.emplace_back(name, value);
		}
		if (mustSort) {
		DictionaryAttr::sortInPlace(elems);
		}
		auto newDict = DictionaryAttr::getWithSorted(ctx, elems);
		setAttributionAttrs(op, index, newDict, attrsName);
		}

		void GPUFuncOp::setWorkgroupAttributionAttr(unsigned index, StringAttr name,
		Attribute value) {
		assert(index < getNumWorkgroupAttributions() &&
		"index must map to a workgroup attribution");
		setAttributionAttr(*this, index, name, value,
		getWorkgroupAttribAttrsAttrName());
		}

		void GPUFuncOp::setPrivateAttributionAttr(unsigned index, StringAttr name,
		Attribute value) {
		assert(index < getNumPrivateAttributions() &&
		"index must map to a private attribution");
		setAttributionAttr(*this, index, name, value,
		getPrivateAttribAttrsAttrName());
		}

LogicalResult GPUFuncOp::verifyType() {		LogicalResult GPUFuncOp::verifyType() {
if (isKernel() && getFunctionType().getNumResults() != 0)		if (isKernel() && getFunctionType().getNumResults() != 0)
return emitOpError() << "expected void return type for kernel function";		return emitOpError() << "expected void return type for kernel function";

return success();		return success();
}		}

/// Verifies the body of the function.		/// Verifies the body of the function.
▲ Show 20 Lines • Show All 404 Lines • Show Last 20 Lines

mlir/test/Conversion/GPUCommon/memory-attrbution.mlir

Show First 20 Lines • Show All 219 Lines • ▼ Show 20 Lines	gpu.func @multiple(%arg0: f32)
%c0 = arith.constant 0 : index		%c0 = arith.constant 0 : index
memref.store %arg0, %arg1[%c0] : memref<1xf32, #gpu.address_space<workgroup>>		memref.store %arg0, %arg1[%c0] : memref<1xf32, #gpu.address_space<workgroup>>
memref.store %arg0, %arg2[%c0] : memref<2xf32, #gpu.address_space<workgroup>>		memref.store %arg0, %arg2[%c0] : memref<2xf32, #gpu.address_space<workgroup>>
memref.store %arg0, %arg3[%c0] : memref<3xf32, #gpu.address_space<private>>		memref.store %arg0, %arg3[%c0] : memref<3xf32, #gpu.address_space<private>>
memref.store %arg0, %arg4[%c0] : memref<4xf32, #gpu.address_space<private>>		memref.store %arg0, %arg4[%c0] : memref<4xf32, #gpu.address_space<private>>
"terminator"() : () -> ()		"terminator"() : () -> ()
}		}
}		}

		// -----

		gpu.module @kernel {
		// Check that alignment attributes are set correctly
		// NVVM: llvm.mlir.global internal @[[$buffer:.*]]()
		// NVVM-SAME: addr_space = 3
		// NVVM-SAME: alignment = 8
		// NVVM-SAME: !llvm.array<48 x f32>

		// ROCDL: llvm.mlir.global internal @[[$buffer:.*]]()
		// ROCDL-SAME: addr_space = 3
		// ROCDL-SAME: alignment = 8
		// ROCDL-SAME: !llvm.array<48 x f32>

		// NVVM-LABEL: llvm.func @explicitAlign
		// ROCDL-LABEL: llvm.func @explicitAlign
		gpu.func @explicitAlign(%arg0 : index)
		workgroup(%arg1: memref<48xf32, #gpu.address_space<workgroup>> {llvm.align = 8 : i64})
		private(%arg2: memref<48xf32, #gpu.address_space<private>> {llvm.align = 4 : i64}) {
		// NVVM: %[[size:.*]] = llvm.mlir.constant(48 : i64) : i64
		// NVVM: %[[raw:.*]] = llvm.alloca %[[size]] x f32 {alignment = 4 : i64} : (i64) -> !llvm.ptr

		// ROCDL: %[[size:.*]] = llvm.mlir.constant(48 : i64) : i64
		// ROCDL: %[[raw:.*]] = llvm.alloca %[[size]] x f32 {alignment = 4 : i64} : (i64) -> !llvm.ptr<5>

		%val = memref.load %arg1[%arg0] : memref<48xf32, #gpu.address_space<workgroup>>
		memref.store %val, %arg2[%arg0] : memref<48xf32, #gpu.address_space<private>>
		"terminator"() : () -> ()
		}
		}