diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -252,7 +252,9 @@
 
   let arguments = (ins TypeAttrOf<FunctionType>:$function_type,
                        OptionalAttr<DictArrayAttr>:$arg_attrs,
-                       OptionalAttr<DictArrayAttr>:$res_attrs);
+                       OptionalAttr<DictArrayAttr>:$res_attrs,
+                       OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
+                       OptionalAttr<DictArrayAttr>:$private_attrib_attrs);
   let regions = (region AnyRegion:$body);
 
   let skipDefaultBuilders = 1;
@@ -279,11 +281,17 @@
       return attr ? attr.getInt() : 0;
     }
 
+    /// Return the index of the first workgroup attribution in the block argument
+    /// list.
+    unsigned getFirstWorkgroupAttributionIndex() {
+      return getFunctionType().getNumInputs();
+    }
+
     /// Returns a list of block arguments that correspond to buffers located in
     /// the workgroup memory
     ArrayRef<BlockArgument> getWorkgroupAttributions() {
       auto begin =
-          std::next(getBody().args_begin(), getFunctionType().getNumInputs());
+          std::next(getBody().args_begin(), getFirstWorkgroupAttributionIndex());
       auto end = std::next(begin, getNumWorkgroupAttributions());
       return {begin, end};
     }
@@ -292,20 +300,47 @@
     /// workgroup memory.
     BlockArgument addWorkgroupAttribution(Type type, Location loc);
 
+    /// Get the workgroup attribution attribute dictionary for the attribution
+    /// at index `index`, counted from the start of the workgroup attributions.
+    DictionaryAttr getworkgroupAttributionAttrs(unsigned index);
+
+    /// Set the workgroup attribution attribute dictionary for the attribution
+    /// at index `index`, counted from the start of the workgroup attributions.
+    void setworkgroupAttributionAttrs(unsigned index, DictionaryAttr value);
+
+    /// Get an attribute for a workgroup attribution. `index` is counted
+    /// from the start of the workgroup attributions, not the start of the block.
+    Attribute getWorkgroupAttributionAttr(unsigned index, StringAttr name);
+    Attribute getWorkgroupAttributionAttr(unsigned index, StringRef name) {
+      return getWorkgroupAttributionAttr(index, StringAttr::get((*this)->getContext(), name));
+    }
+
+    /// Set an attribute for a workgroup attribution. `index` is counted
+    /// from the start of the workgroup attributions, not the start of the block.
+    /// A null `value` removes an attributino attribute.
+    void setWorkgroupAttributionAttr(unsigned index, StringAttr name, Attribute value);
+    void setWorkgroupAttributionAttr(unsigned index, StringRef name, Attribute value) {
+      return setWorkgroupAttributionAttr(index, StringAttr::get((*this)->getContext(), name), value);
+    }
+
     /// Returns the number of buffers located in the private memory.
     unsigned getNumPrivateAttributions() {
       return getBody().getNumArguments() - getFunctionType().getNumInputs() -
           getNumWorkgroupAttributions();
     }
 
+    /// Returns the index of the first private buffer in the block argument list.
+    unsigned getFirstPrivateAttributionIndex() {
+      // Buffers on the private memory always come after buffers on the workgroup
+      // memory.
+      return getFunctionType().getNumInputs() + getNumWorkgroupAttributions();
+    }
+
     /// Returns a list of block arguments that correspond to buffers located in
     /// the private memory.
     ArrayRef<BlockArgument> getPrivateAttributions() {
-      // Buffers on the private memory always come after buffers on the workgroup
-      // memory.
       auto begin =
-          std::next(getBody().args_begin(),
-                    getFunctionType().getNumInputs() + getNumWorkgroupAttributions());
+          std::next(getBody().args_begin(), getFirstPrivateAttributionIndex());
       return {begin, getBody().args_end()};
     }
 
@@ -313,6 +348,29 @@
     /// private memory.
     BlockArgument addPrivateAttribution(Type type, Location loc);
 
+    /// Get the private attribution attribute dictionary for the attribution
+    /// at index `index`, counted from the start of the private attributions.
+    DictionaryAttr getPrivateAttributionAttrs(unsigned index);
+
+    /// Set the private attribution attribute dictionary for the attribution
+    /// at index `index`, counted from the start of the private attributions.
+    void setPrivateAttributionAttrs(unsigned index, DictionaryAttr value);
+
+    /// Get an attribute for a private attribution. `index` is counted
+    /// from the start of the private attributions, not the start of the block.
+    Attribute getPrivateAttributionAttr(unsigned index, StringAttr name);
+    Attribute getPrivateAttributionAttr(unsigned index, StringRef name) {
+      return getPrivateAttributionAttr(index, StringAttr::get((*this)->getContext(), name));
+    }
+
+    /// Set an attribute for a private attribution. `index` is counted
+    /// from the start of the private attributions, not the start of the block.
+    /// A null `value` removes an attribute.
+    void setPrivateAttributionAttr(unsigned index, StringAttr name, Attribute value);
+    void setPrivateAttributionAttr(unsigned index, StringRef name, Attribute value) {
+      return setPrivateAttributionAttr(index, StringAttr::get((*this)->getContext(), name), value);
+    }
+
     /// Returns the name of the attribute containing the number of buffers
     /// located in the workgroup memory.
     static StringRef getNumWorkgroupAttributionsAttrName() {
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
@@ -24,7 +24,7 @@
   SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
   workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
   for (const auto &en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
-    Value attribution = en.value();
+    BlockArgument attribution = en.value();
 
     auto type = attribution.getType().dyn_cast<MemRefType>();
     assert(type && type.hasStaticShape() && "unexpected type in attribution");
@@ -36,10 +36,17 @@
     auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
     std::string name = std::string(
         llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index()));
+    uint64_t alignment = 0;
+    if (auto alignAttr =
+            gpuFuncOp
+                .getWorkgroupAttributionAttr(
+                    en.index(), LLVM::LLVMDialect::getAlignAttrName())
+                .dyn_cast_or_null<IntegerAttr>())
+      alignment = alignAttr.getInt();
     auto globalOp = rewriter.create<LLVM::GlobalOp>(
         gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
-        LLVM::Linkage::Internal, name, /*value=*/Attribute(),
-        /*alignment=*/0, workgroupAddrSpace);
+        LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
+        workgroupAddrSpace);
     workgroupBuffers.push_back(globalOp);
   }
 
@@ -56,7 +63,10 @@
   for (const auto &attr : gpuFuncOp->getAttrs()) {
     if (attr.getName() == SymbolTable::getSymbolAttrName() ||
         attr.getName() == gpuFuncOp.getFunctionTypeAttrName() ||
-        attr.getName() == gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName())
+        attr.getName() ==
+            gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() ||
+        attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
+        attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName())
       continue;
     attributes.push_back(attr);
   }
@@ -124,9 +134,15 @@
           getTypeConverter()->getPointerType(elementType, allocaAddrSpace);
       Value numElements = rewriter.create<LLVM::ConstantOp>(
           gpuFuncOp.getLoc(), int64Ty, type.getNumElements());
+      uint64_t alignment = 0;
+      if (auto alignAttr =
+              gpuFuncOp
+                  .getPrivateAttributionAttr(
+                      en.index(), LLVM::LLVMDialect::getAlignAttrName())
+                  .dyn_cast_or_null<IntegerAttr>())
+        alignment = alignAttr.getInt();
       Value allocated = rewriter.create<LLVM::AllocaOp>(
-          gpuFuncOp.getLoc(), ptrType, elementType, numElements,
-          /*alignment=*/0);
+          gpuFuncOp.getLoc(), ptrType, elementType, numElements, alignment);
       auto descr = MemRefDescriptor::fromStaticShape(
           rewriter, loc, *getTypeConverter(), type, allocated);
       signatureConversion.remapInput(
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -903,13 +903,38 @@
 /// keyword provided as argument.
 static ParseResult
 parseAttributions(OpAsmParser &parser, StringRef keyword,
-                  SmallVectorImpl<OpAsmParser::Argument> &args) {
+                  SmallVectorImpl<OpAsmParser::Argument> &args,
+                  Attribute &attributionAttrs) {
   // If we could not parse the keyword, just assume empty list and succeed.
   if (failed(parser.parseOptionalKeyword(keyword)))
     return success();
 
-  return parser.parseArgumentList(args, OpAsmParser::Delimiter::Paren,
-                                  /*allowType=*/true);
+  size_t existingArgs = args.size();
+  ParseResult result =
+      parser.parseArgumentList(args, OpAsmParser::Delimiter::Paren,
+                               /*allowType=*/true, /*allowAttrs=*/true);
+  if (failed(result))
+    return result;
+
+  bool hadAttrs = llvm::any_of(ArrayRef(args).drop_front(existingArgs),
+                               [](const OpAsmParser::Argument &arg) -> bool {
+                                 return arg.attrs && !arg.attrs.empty();
+                               });
+  if (!hadAttrs) {
+    attributionAttrs = nullptr;
+    return result;
+  }
+
+  Builder &builder = parser.getBuilder();
+  SmallVector<Attribute> attributionAttrsVec;
+  for (const auto &argument : ArrayRef(args).drop_front(existingArgs)) {
+    if (!argument.attrs)
+      attributionAttrsVec.push_back(builder.getDictionaryAttr({}));
+    else
+      attributionAttrsVec.push_back(argument.attrs);
+  }
+  attributionAttrs = builder.getArrayAttr(attributionAttrsVec);
+  return result;
 }
 
 /// Parses a GPU function.
@@ -954,9 +979,10 @@
       builder, result, entryArgs, resultAttrs, getArgAttrsAttrName(result.name),
       getResAttrsAttrName(result.name));
 
+  Attribute workgroupAttributionAttrs;
   // Parse workgroup memory attributions.
   if (failed(parseAttributions(parser, GPUFuncOp::getWorkgroupKeyword(),
-                               entryArgs)))
+                               entryArgs, workgroupAttributionAttrs)))
     return failure();
 
   // Store the number of operands we just parsed as the number of workgroup
@@ -964,11 +990,18 @@
   unsigned numWorkgroupAttrs = entryArgs.size() - type.getNumInputs();
   result.addAttribute(GPUFuncOp::getNumWorkgroupAttributionsAttrName(),
                       builder.getI64IntegerAttr(numWorkgroupAttrs));
+  if (workgroupAttributionAttrs)
+    result.addAttribute(GPUFuncOp::getWorkgroupAttribAttrsAttrName(result.name),
+                        workgroupAttributionAttrs);
 
+  Attribute privateAttributionAttrs;
   // Parse private memory attributions.
-  if (failed(
-          parseAttributions(parser, GPUFuncOp::getPrivateKeyword(), entryArgs)))
+  if (failed(parseAttributions(parser, GPUFuncOp::getPrivateKeyword(),
+                               entryArgs, privateAttributionAttrs)))
     return failure();
+  if (privateAttributionAttrs)
+    result.addAttribute(GPUFuncOp::getPrivateAttribAttrsAttrName(result.name),
+                        privateAttributionAttrs);
 
   // Parse the kernel attribute if present.
   if (succeeded(parser.parseOptionalKeyword(GPUFuncOp::getKernelKeyword())))
@@ -986,13 +1019,24 @@
 }
 
 static void printAttributions(OpAsmPrinter &p, StringRef keyword,
-                              ArrayRef<BlockArgument> values) {
+                              ArrayRef<BlockArgument> values,
+                              ArrayAttr attributes) {
   if (values.empty())
     return;
 
   p << ' ' << keyword << '(';
   llvm::interleaveComma(
-      values, p, [&p](BlockArgument v) { p << v << " : " << v.getType(); });
+      llvm::enumerate(values), p, [&p, attributes](auto pair) {
+        BlockArgument v = pair.value();
+        p << v << " : " << v.getType();
+
+        size_t attributionIndex = pair.index();
+        DictionaryAttr attrs;
+        if (attributes && attributionIndex < attributes.size())
+          attrs = attributes[attributionIndex].cast<DictionaryAttr>();
+        if (attrs)
+          p.printOptionalAttrDict(attrs.getValue());
+      });
   p << ')';
 }
 
@@ -1005,8 +1049,10 @@
                                                   /*isVariadic=*/false,
                                                   type.getResults());
 
-  printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
-  printAttributions(p, getPrivateKeyword(), getPrivateAttributions());
+  printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions(),
+                    getWorkgroupAttribAttrs().value_or(nullptr));
+  printAttributions(p, getPrivateKeyword(), getPrivateAttributions(),
+                    getPrivateAttribAttrs().value_or(nullptr));
   if (isKernel())
     p << ' ' << getKernelKeyword();
 
@@ -1014,11 +1060,130 @@
       p, *this,
       {getNumWorkgroupAttributionsAttrName(),
        GPUDialect::getKernelFuncAttrName(), getFunctionTypeAttrName(),
-       getArgAttrsAttrName(), getResAttrsAttrName()});
+       getArgAttrsAttrName(), getResAttrsAttrName(),
+       getWorkgroupAttribAttrsAttrName(), getPrivateAttribAttrsAttrName()});
   p << ' ';
   p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
 }
 
+static DictionaryAttr getAttributionAttrs(GPUFuncOp op, unsigned index,
+                                          StringAttr attrName) {
+  auto allAttrs = op->getAttr(attrName).dyn_cast_or_null<ArrayAttr>();
+  if (!allAttrs || index >= allAttrs.size())
+    return DictionaryAttr();
+  return allAttrs[index].cast<DictionaryAttr>();
+}
+
+DictionaryAttr GPUFuncOp::getworkgroupAttributionAttrs(unsigned index) {
+  return getAttributionAttrs(*this, index, getWorkgroupAttribAttrsAttrName());
+}
+
+DictionaryAttr GPUFuncOp::getPrivateAttributionAttrs(unsigned index) {
+  return getAttributionAttrs(*this, index, getPrivateAttribAttrsAttrName());
+}
+
+static void setAttributionAttrs(GPUFuncOp op, unsigned index,
+                                DictionaryAttr value, StringAttr attrName) {
+  MLIRContext *ctx = op.getContext();
+  auto allAttrs = op->getAttr(attrName).dyn_cast_or_null<ArrayAttr>();
+  SmallVector<Attribute> elements;
+  if (allAttrs)
+    elements.append(allAttrs.begin(), allAttrs.end());
+  while (elements.size() <= index)
+    elements.push_back(DictionaryAttr::get(ctx));
+  if (!value)
+    elements[index] = DictionaryAttr::get(ctx);
+  else
+    elements[index] = value;
+  ArrayAttr newValue = ArrayAttr::get(ctx, elements);
+  op->setAttr(attrName, newValue);
+}
+
+void GPUFuncOp::setworkgroupAttributionAttrs(unsigned index,
+                                             DictionaryAttr value) {
+  setAttributionAttrs(*this, index, value, getWorkgroupAttribAttrsAttrName());
+}
+
+void GPUFuncOp::setPrivateAttributionAttrs(unsigned int index,
+                                           DictionaryAttr value) {
+  setAttributionAttrs(*this, index, value, getPrivateAttribAttrsAttrName());
+}
+
+static Attribute getAttributionAttr(GPUFuncOp op, unsigned index,
+                                    StringAttr name, StringAttr attrsName) {
+  DictionaryAttr dict = getAttributionAttrs(op, index, attrsName);
+  if (!dict)
+    return Attribute();
+  return dict.get(name);
+}
+
+Attribute GPUFuncOp::getWorkgroupAttributionAttr(unsigned index,
+                                                 StringAttr name) {
+  assert(index < getNumWorkgroupAttributions() &&
+         "index must map to a workgroup attribution");
+  return getAttributionAttr(*this, index, name,
+                            getWorkgroupAttribAttrsAttrName());
+}
+
+Attribute GPUFuncOp::getPrivateAttributionAttr(unsigned index,
+                                               StringAttr name) {
+  assert(index < getNumPrivateAttributions() &&
+         "index must map to a private attribution");
+  return getAttributionAttr(*this, index, name,
+                            getPrivateAttribAttrsAttrName());
+}
+
+static void setAttributionAttr(GPUFuncOp op, unsigned index, StringAttr name,
+                               Attribute value, StringAttr attrsName) {
+  MLIRContext *ctx = op.getContext();
+  SmallVector<NamedAttribute> elems;
+  DictionaryAttr oldDict = getAttributionAttrs(op, index, attrsName);
+  if (oldDict)
+    elems.append(oldDict.getValue().begin(), oldDict.getValue().end());
+
+  bool found = false;
+  bool mustSort = true;
+  for (unsigned i = 0, e = elems.size(); i < e; ++i) {
+    if (elems[i].getName() == name) {
+      found = true;
+      if (!value) {
+        std::swap(elems[i], elems[elems.size() - 1]);
+        elems.pop_back();
+      } else {
+        mustSort = false;
+        elems[i] = NamedAttribute(elems[i].getName(), value);
+      }
+      break;
+    }
+  }
+  if (!found) {
+    if (!value)
+      return;
+    elems.emplace_back(name, value);
+  }
+  if (mustSort) {
+    DictionaryAttr::sortInPlace(elems);
+  }
+  auto newDict = DictionaryAttr::getWithSorted(ctx, elems);
+  setAttributionAttrs(op, index, newDict, attrsName);
+}
+
+void GPUFuncOp::setWorkgroupAttributionAttr(unsigned index, StringAttr name,
+                                            Attribute value) {
+  assert(index < getNumWorkgroupAttributions() &&
+         "index must map to a workgroup attribution");
+  setAttributionAttr(*this, index, name, value,
+                     getWorkgroupAttribAttrsAttrName());
+}
+
+void GPUFuncOp::setPrivateAttributionAttr(unsigned index, StringAttr name,
+                                          Attribute value) {
+  assert(index < getNumPrivateAttributions() &&
+         "index must map to a private attribution");
+  setAttributionAttr(*this, index, name, value,
+                     getPrivateAttribAttrsAttrName());
+}
+
 LogicalResult GPUFuncOp::verifyType() {
   if (isKernel() && getFunctionType().getNumResults() != 0)
     return emitOpError() << "expected void return type for kernel function";
diff --git a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir
--- a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir
+++ b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir
@@ -225,3 +225,34 @@
     "terminator"() : () -> ()
   }
 }
+
+// -----
+
+gpu.module @kernel {
+  // Check that alignment attributes are set correctly
+  // NVVM: llvm.mlir.global internal @[[$buffer:.*]]()
+  // NVVM-SAME:  addr_space = 3
+  // NVVM-SAME:  alignment = 8
+  // NVVM-SAME:  !llvm.array<48 x f32>
+
+  // ROCDL: llvm.mlir.global internal @[[$buffer:.*]]()
+  // ROCDL-SAME:  addr_space = 3
+  // ROCDL-SAME:  alignment = 8
+  // ROCDL-SAME:  !llvm.array<48 x f32>
+
+  // NVVM-LABEL: llvm.func @explicitAlign
+  // ROCDL-LABEL: llvm.func @explicitAlign
+  gpu.func @explicitAlign(%arg0 : index)
+    workgroup(%arg1: memref<48xf32, #gpu.address_space<workgroup>> {llvm.align = 8 : i64})
+    private(%arg2: memref<48xf32, #gpu.address_space<private>> {llvm.align = 4 : i64}) {
+    // NVVM: %[[size:.*]] = llvm.mlir.constant(48 : i64) : i64
+    // NVVM: %[[raw:.*]] = llvm.alloca %[[size]] x f32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+
+    // ROCDL: %[[size:.*]] = llvm.mlir.constant(48 : i64) : i64
+    // ROCDL: %[[raw:.*]] = llvm.alloca %[[size]] x f32 {alignment = 4 : i64} : (i64) -> !llvm.ptr<5>
+
+    %val = memref.load %arg1[%arg0] : memref<48xf32, #gpu.address_space<workgroup>>
+    memref.store %val, %arg2[%arg0] : memref<48xf32, #gpu.address_space<private>>
+    "terminator"() : () -> ()
+  }
+}