diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -252,7 +252,9 @@ let arguments = (ins TypeAttrOf:$function_type, OptionalAttr:$arg_attrs, - OptionalAttr:$res_attrs); + OptionalAttr:$res_attrs, + OptionalAttr:$workgroup_attrib_attrs, + OptionalAttr:$private_attrib_attrs); let regions = (region AnyRegion:$body); let skipDefaultBuilders = 1; @@ -279,11 +281,17 @@ return attr ? attr.getInt() : 0; } + /// Return the index of the first workgroup attribution in the block argument + /// list. + unsigned getFirstWorkgroupAttributionIndex() { + return getFunctionType().getNumInputs(); + } + /// Returns a list of block arguments that correspond to buffers located in /// the workgroup memory ArrayRef getWorkgroupAttributions() { auto begin = - std::next(getBody().args_begin(), getFunctionType().getNumInputs()); + std::next(getBody().args_begin(), getFirstWorkgroupAttributionIndex()); auto end = std::next(begin, getNumWorkgroupAttributions()); return {begin, end}; } @@ -292,20 +300,47 @@ /// workgroup memory. BlockArgument addWorkgroupAttribution(Type type, Location loc); + /// Get the workgroup attribution attribute dictionary for the attribution + /// at index `index`, counted from the start of the workgroup attributions. + DictionaryAttr getworkgroupAttributionAttrs(unsigned index); + + /// Set the workgroup attribution attribute dictionary for the attribution + /// at index `index`, counted from the start of the workgroup attributions. + void setworkgroupAttributionAttrs(unsigned index, DictionaryAttr value); + + /// Get an attribute for a workgroup attribution. `index` is counted + /// from the start of the workgroup attributions, not the start of the block. + Attribute getWorkgroupAttributionAttr(unsigned index, StringAttr name); + Attribute getWorkgroupAttributionAttr(unsigned index, StringRef name) { + return getWorkgroupAttributionAttr(index, StringAttr::get((*this)->getContext(), name)); + } + + /// Set an attribute for a workgroup attribution. `index` is counted + /// from the start of the workgroup attributions, not the start of the block. + /// A null `value` removes an attributino attribute. + void setWorkgroupAttributionAttr(unsigned index, StringAttr name, Attribute value); + void setWorkgroupAttributionAttr(unsigned index, StringRef name, Attribute value) { + return setWorkgroupAttributionAttr(index, StringAttr::get((*this)->getContext(), name), value); + } + /// Returns the number of buffers located in the private memory. unsigned getNumPrivateAttributions() { return getBody().getNumArguments() - getFunctionType().getNumInputs() - getNumWorkgroupAttributions(); } + /// Returns the index of the first private buffer in the block argument list. + unsigned getFirstPrivateAttributionIndex() { + // Buffers on the private memory always come after buffers on the workgroup + // memory. + return getFunctionType().getNumInputs() + getNumWorkgroupAttributions(); + } + /// Returns a list of block arguments that correspond to buffers located in /// the private memory. ArrayRef getPrivateAttributions() { - // Buffers on the private memory always come after buffers on the workgroup - // memory. auto begin = - std::next(getBody().args_begin(), - getFunctionType().getNumInputs() + getNumWorkgroupAttributions()); + std::next(getBody().args_begin(), getFirstPrivateAttributionIndex()); return {begin, getBody().args_end()}; } @@ -313,6 +348,29 @@ /// private memory. BlockArgument addPrivateAttribution(Type type, Location loc); + /// Get the private attribution attribute dictionary for the attribution + /// at index `index`, counted from the start of the private attributions. + DictionaryAttr getPrivateAttributionAttrs(unsigned index); + + /// Set the private attribution attribute dictionary for the attribution + /// at index `index`, counted from the start of the private attributions. + void setPrivateAttributionAttrs(unsigned index, DictionaryAttr value); + + /// Get an attribute for a private attribution. `index` is counted + /// from the start of the private attributions, not the start of the block. + Attribute getPrivateAttributionAttr(unsigned index, StringAttr name); + Attribute getPrivateAttributionAttr(unsigned index, StringRef name) { + return getPrivateAttributionAttr(index, StringAttr::get((*this)->getContext(), name)); + } + + /// Set an attribute for a private attribution. `index` is counted + /// from the start of the private attributions, not the start of the block. + /// A null `value` removes an attribute. + void setPrivateAttributionAttr(unsigned index, StringAttr name, Attribute value); + void setPrivateAttributionAttr(unsigned index, StringRef name, Attribute value) { + return setPrivateAttributionAttr(index, StringAttr::get((*this)->getContext(), name), value); + } + /// Returns the name of the attribute containing the number of buffers /// located in the workgroup memory. static StringRef getNumWorkgroupAttributionsAttrName() { diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -24,7 +24,7 @@ SmallVector workgroupBuffers; workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions()); for (const auto &en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { - Value attribution = en.value(); + BlockArgument attribution = en.value(); auto type = attribution.getType().dyn_cast(); assert(type && type.hasStaticShape() && "unexpected type in attribution"); @@ -36,10 +36,17 @@ auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements); std::string name = std::string( llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index())); + uint64_t alignment = 0; + if (auto alignAttr = + gpuFuncOp + .getWorkgroupAttributionAttr( + en.index(), LLVM::LLVMDialect::getAlignAttrName()) + .dyn_cast_or_null()) + alignment = alignAttr.getInt(); auto globalOp = rewriter.create( gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false, - LLVM::Linkage::Internal, name, /*value=*/Attribute(), - /*alignment=*/0, workgroupAddrSpace); + LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment, + workgroupAddrSpace); workgroupBuffers.push_back(globalOp); } @@ -56,7 +63,10 @@ for (const auto &attr : gpuFuncOp->getAttrs()) { if (attr.getName() == SymbolTable::getSymbolAttrName() || attr.getName() == gpuFuncOp.getFunctionTypeAttrName() || - attr.getName() == gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName()) + attr.getName() == + gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() || + attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() || + attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName()) continue; attributes.push_back(attr); } @@ -124,9 +134,15 @@ getTypeConverter()->getPointerType(elementType, allocaAddrSpace); Value numElements = rewriter.create( gpuFuncOp.getLoc(), int64Ty, type.getNumElements()); + uint64_t alignment = 0; + if (auto alignAttr = + gpuFuncOp + .getPrivateAttributionAttr( + en.index(), LLVM::LLVMDialect::getAlignAttrName()) + .dyn_cast_or_null()) + alignment = alignAttr.getInt(); Value allocated = rewriter.create( - gpuFuncOp.getLoc(), ptrType, elementType, numElements, - /*alignment=*/0); + gpuFuncOp.getLoc(), ptrType, elementType, numElements, alignment); auto descr = MemRefDescriptor::fromStaticShape( rewriter, loc, *getTypeConverter(), type, allocated); signatureConversion.remapInput( diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -903,13 +903,38 @@ /// keyword provided as argument. static ParseResult parseAttributions(OpAsmParser &parser, StringRef keyword, - SmallVectorImpl &args) { + SmallVectorImpl &args, + Attribute &attributionAttrs) { // If we could not parse the keyword, just assume empty list and succeed. if (failed(parser.parseOptionalKeyword(keyword))) return success(); - return parser.parseArgumentList(args, OpAsmParser::Delimiter::Paren, - /*allowType=*/true); + size_t existingArgs = args.size(); + ParseResult result = + parser.parseArgumentList(args, OpAsmParser::Delimiter::Paren, + /*allowType=*/true, /*allowAttrs=*/true); + if (failed(result)) + return result; + + bool hadAttrs = llvm::any_of(ArrayRef(args).drop_front(existingArgs), + [](const OpAsmParser::Argument &arg) -> bool { + return arg.attrs && !arg.attrs.empty(); + }); + if (!hadAttrs) { + attributionAttrs = nullptr; + return result; + } + + Builder &builder = parser.getBuilder(); + SmallVector attributionAttrsVec; + for (const auto &argument : ArrayRef(args).drop_front(existingArgs)) { + if (!argument.attrs) + attributionAttrsVec.push_back(builder.getDictionaryAttr({})); + else + attributionAttrsVec.push_back(argument.attrs); + } + attributionAttrs = builder.getArrayAttr(attributionAttrsVec); + return result; } /// Parses a GPU function. @@ -954,9 +979,10 @@ builder, result, entryArgs, resultAttrs, getArgAttrsAttrName(result.name), getResAttrsAttrName(result.name)); + Attribute workgroupAttributionAttrs; // Parse workgroup memory attributions. if (failed(parseAttributions(parser, GPUFuncOp::getWorkgroupKeyword(), - entryArgs))) + entryArgs, workgroupAttributionAttrs))) return failure(); // Store the number of operands we just parsed as the number of workgroup @@ -964,11 +990,18 @@ unsigned numWorkgroupAttrs = entryArgs.size() - type.getNumInputs(); result.addAttribute(GPUFuncOp::getNumWorkgroupAttributionsAttrName(), builder.getI64IntegerAttr(numWorkgroupAttrs)); + if (workgroupAttributionAttrs) + result.addAttribute(GPUFuncOp::getWorkgroupAttribAttrsAttrName(result.name), + workgroupAttributionAttrs); + Attribute privateAttributionAttrs; // Parse private memory attributions. - if (failed( - parseAttributions(parser, GPUFuncOp::getPrivateKeyword(), entryArgs))) + if (failed(parseAttributions(parser, GPUFuncOp::getPrivateKeyword(), + entryArgs, privateAttributionAttrs))) return failure(); + if (privateAttributionAttrs) + result.addAttribute(GPUFuncOp::getPrivateAttribAttrsAttrName(result.name), + privateAttributionAttrs); // Parse the kernel attribute if present. if (succeeded(parser.parseOptionalKeyword(GPUFuncOp::getKernelKeyword()))) @@ -986,13 +1019,24 @@ } static void printAttributions(OpAsmPrinter &p, StringRef keyword, - ArrayRef values) { + ArrayRef values, + ArrayAttr attributes) { if (values.empty()) return; p << ' ' << keyword << '('; llvm::interleaveComma( - values, p, [&p](BlockArgument v) { p << v << " : " << v.getType(); }); + llvm::enumerate(values), p, [&p, attributes](auto pair) { + BlockArgument v = pair.value(); + p << v << " : " << v.getType(); + + size_t attributionIndex = pair.index(); + DictionaryAttr attrs; + if (attributes && attributionIndex < attributes.size()) + attrs = attributes[attributionIndex].cast(); + if (attrs) + p.printOptionalAttrDict(attrs.getValue()); + }); p << ')'; } @@ -1005,8 +1049,10 @@ /*isVariadic=*/false, type.getResults()); - printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions()); - printAttributions(p, getPrivateKeyword(), getPrivateAttributions()); + printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions(), + getWorkgroupAttribAttrs().value_or(nullptr)); + printAttributions(p, getPrivateKeyword(), getPrivateAttributions(), + getPrivateAttribAttrs().value_or(nullptr)); if (isKernel()) p << ' ' << getKernelKeyword(); @@ -1014,11 +1060,130 @@ p, *this, {getNumWorkgroupAttributionsAttrName(), GPUDialect::getKernelFuncAttrName(), getFunctionTypeAttrName(), - getArgAttrsAttrName(), getResAttrsAttrName()}); + getArgAttrsAttrName(), getResAttrsAttrName(), + getWorkgroupAttribAttrsAttrName(), getPrivateAttribAttrsAttrName()}); p << ' '; p.printRegion(getBody(), /*printEntryBlockArgs=*/false); } +static DictionaryAttr getAttributionAttrs(GPUFuncOp op, unsigned index, + StringAttr attrName) { + auto allAttrs = op->getAttr(attrName).dyn_cast_or_null(); + if (!allAttrs || index >= allAttrs.size()) + return DictionaryAttr(); + return allAttrs[index].cast(); +} + +DictionaryAttr GPUFuncOp::getworkgroupAttributionAttrs(unsigned index) { + return getAttributionAttrs(*this, index, getWorkgroupAttribAttrsAttrName()); +} + +DictionaryAttr GPUFuncOp::getPrivateAttributionAttrs(unsigned index) { + return getAttributionAttrs(*this, index, getPrivateAttribAttrsAttrName()); +} + +static void setAttributionAttrs(GPUFuncOp op, unsigned index, + DictionaryAttr value, StringAttr attrName) { + MLIRContext *ctx = op.getContext(); + auto allAttrs = op->getAttr(attrName).dyn_cast_or_null(); + SmallVector elements; + if (allAttrs) + elements.append(allAttrs.begin(), allAttrs.end()); + while (elements.size() <= index) + elements.push_back(DictionaryAttr::get(ctx)); + if (!value) + elements[index] = DictionaryAttr::get(ctx); + else + elements[index] = value; + ArrayAttr newValue = ArrayAttr::get(ctx, elements); + op->setAttr(attrName, newValue); +} + +void GPUFuncOp::setworkgroupAttributionAttrs(unsigned index, + DictionaryAttr value) { + setAttributionAttrs(*this, index, value, getWorkgroupAttribAttrsAttrName()); +} + +void GPUFuncOp::setPrivateAttributionAttrs(unsigned int index, + DictionaryAttr value) { + setAttributionAttrs(*this, index, value, getPrivateAttribAttrsAttrName()); +} + +static Attribute getAttributionAttr(GPUFuncOp op, unsigned index, + StringAttr name, StringAttr attrsName) { + DictionaryAttr dict = getAttributionAttrs(op, index, attrsName); + if (!dict) + return Attribute(); + return dict.get(name); +} + +Attribute GPUFuncOp::getWorkgroupAttributionAttr(unsigned index, + StringAttr name) { + assert(index < getNumWorkgroupAttributions() && + "index must map to a workgroup attribution"); + return getAttributionAttr(*this, index, name, + getWorkgroupAttribAttrsAttrName()); +} + +Attribute GPUFuncOp::getPrivateAttributionAttr(unsigned index, + StringAttr name) { + assert(index < getNumPrivateAttributions() && + "index must map to a private attribution"); + return getAttributionAttr(*this, index, name, + getPrivateAttribAttrsAttrName()); +} + +static void setAttributionAttr(GPUFuncOp op, unsigned index, StringAttr name, + Attribute value, StringAttr attrsName) { + MLIRContext *ctx = op.getContext(); + SmallVector elems; + DictionaryAttr oldDict = getAttributionAttrs(op, index, attrsName); + if (oldDict) + elems.append(oldDict.getValue().begin(), oldDict.getValue().end()); + + bool found = false; + bool mustSort = true; + for (unsigned i = 0, e = elems.size(); i < e; ++i) { + if (elems[i].getName() == name) { + found = true; + if (!value) { + std::swap(elems[i], elems[elems.size() - 1]); + elems.pop_back(); + } else { + mustSort = false; + elems[i] = NamedAttribute(elems[i].getName(), value); + } + break; + } + } + if (!found) { + if (!value) + return; + elems.emplace_back(name, value); + } + if (mustSort) { + DictionaryAttr::sortInPlace(elems); + } + auto newDict = DictionaryAttr::getWithSorted(ctx, elems); + setAttributionAttrs(op, index, newDict, attrsName); +} + +void GPUFuncOp::setWorkgroupAttributionAttr(unsigned index, StringAttr name, + Attribute value) { + assert(index < getNumWorkgroupAttributions() && + "index must map to a workgroup attribution"); + setAttributionAttr(*this, index, name, value, + getWorkgroupAttribAttrsAttrName()); +} + +void GPUFuncOp::setPrivateAttributionAttr(unsigned index, StringAttr name, + Attribute value) { + assert(index < getNumPrivateAttributions() && + "index must map to a private attribution"); + setAttributionAttr(*this, index, name, value, + getPrivateAttribAttrsAttrName()); +} + LogicalResult GPUFuncOp::verifyType() { if (isKernel() && getFunctionType().getNumResults() != 0) return emitOpError() << "expected void return type for kernel function"; diff --git a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir --- a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir +++ b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir @@ -225,3 +225,34 @@ "terminator"() : () -> () } } + +// ----- + +gpu.module @kernel { + // Check that alignment attributes are set correctly + // NVVM: llvm.mlir.global internal @[[$buffer:.*]]() + // NVVM-SAME: addr_space = 3 + // NVVM-SAME: alignment = 8 + // NVVM-SAME: !llvm.array<48 x f32> + + // ROCDL: llvm.mlir.global internal @[[$buffer:.*]]() + // ROCDL-SAME: addr_space = 3 + // ROCDL-SAME: alignment = 8 + // ROCDL-SAME: !llvm.array<48 x f32> + + // NVVM-LABEL: llvm.func @explicitAlign + // ROCDL-LABEL: llvm.func @explicitAlign + gpu.func @explicitAlign(%arg0 : index) + workgroup(%arg1: memref<48xf32, #gpu.address_space> {llvm.align = 8 : i64}) + private(%arg2: memref<48xf32, #gpu.address_space> {llvm.align = 4 : i64}) { + // NVVM: %[[size:.*]] = llvm.mlir.constant(48 : i64) : i64 + // NVVM: %[[raw:.*]] = llvm.alloca %[[size]] x f32 {alignment = 4 : i64} : (i64) -> !llvm.ptr + + // ROCDL: %[[size:.*]] = llvm.mlir.constant(48 : i64) : i64 + // ROCDL: %[[raw:.*]] = llvm.alloca %[[size]] x f32 {alignment = 4 : i64} : (i64) -> !llvm.ptr<5> + + %val = memref.load %arg1[%arg0] : memref<48xf32, #gpu.address_space> + memref.store %val, %arg2[%arg0] : memref<48xf32, #gpu.address_space> + "terminator"() : () -> () + } +}