Index: llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -42,6 +42,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
@@ -282,6 +283,23 @@
     // so remove the variables from these lists before replaceAllUsesWith
     removeFromUsedLists(M, LocalVars);
 
+    // Create alias.scope and their lists. Each field in the new structure
+    // does not alias with all other fields.
+    SmallVector<MDNode *> AliasScopes;
+    SmallVector<Metadata *> NoAliasList;
+    if (LocalVars.size() > 1) {
+      MDBuilder MDB(Ctx);
+      AliasScopes.reserve(LocalVars.size());
+      for (size_t I = 0; I < LocalVars.size(); I++) {
+        MDNode *Domain = MDB.createAnonymousAliasScopeDomain();
+        MDNode *Scope = MDB.createAnonymousAliasScope(Domain);
+        AliasScopes.push_back(Scope);
+      }
+      NoAliasList.append(&AliasScopes[1], AliasScopes.end());
+    } else {
+      AliasScopes.push_back(nullptr);
+    }
+
     // Replace uses of ith variable with a constantexpr to the ith field of the
     // instance that will be allocated by AMDGPUMachineFunction
     Type *I32 = Type::getInt32Ty(Ctx);
@@ -313,7 +331,13 @@
 
       uint64_t Off = DL.getStructLayout(LDSTy)->getElementOffset(I);
       Align A = commonAlignment(StructAlign, Off);
-      refineUsesAlignment(GEP, A, DL);
+
+      if (I)
+        NoAliasList[I - 1] = AliasScopes[I - 1];
+      MDNode *NoAlias = NoAliasList.empty() ? nullptr
+                                            : MDNode::get(Ctx, NoAliasList);
+
+      refineUsesAlignmentAndAA(GEP, A, DL, AliasScopes[I], NoAlias);
     }
 
     // Mark kernels with asm that reads the address of the allocated structure
@@ -334,12 +358,26 @@
     return true;
   }
 
-  void refineUsesAlignment(Value *Ptr, Align A, const DataLayout &DL,
-                           unsigned MaxDepth = 5) {
-    if (!MaxDepth || A == 1)
+  void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
+                                MDNode *AliasScope, MDNode *NoAlias,
+                                unsigned MaxDepth = 5) {
+    if (!MaxDepth || (A == 1 && !AliasScope))
       return;
 
     for (User *U : Ptr->users()) {
+      if (auto *I = dyn_cast<Instruction>(U)) {
+        if (AliasScope && I->mayReadOrWriteMemory()) {
+          MDNode *AS =
+              MDNode::concatenate(I->getMetadata(LLVMContext::MD_alias_scope),
+                                  AliasScope);
+          I->setMetadata(LLVMContext::MD_alias_scope, AS);
+          MDNode *NA =
+              MDNode::concatenate(I->getMetadata(LLVMContext::MD_noalias),
+                                  NoAlias);
+          I->setMetadata(LLVMContext::MD_noalias, NA);
+        }
+      }
+
       if (auto *LI = dyn_cast<LoadInst>(U)) {
         LI->setAlignment(std::max(A, LI->getAlign()));
         continue;
@@ -364,17 +402,20 @@
       if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
         unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
         APInt Off(BitWidth, 0);
-        if (GEP->getPointerOperand() == Ptr &&
-            GEP->accumulateConstantOffset(DL, Off)) {
-          Align GA = commonAlignment(A, Off.getLimitedValue());
-          refineUsesAlignment(GEP, GA, DL, MaxDepth - 1);
+        if (GEP->getPointerOperand() == Ptr) {
+          Align GA;
+          if (GEP->accumulateConstantOffset(DL, Off))
+            GA = commonAlignment(A, Off.getLimitedValue());
+          refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias,
+                                   MaxDepth - 1);
         }
         continue;
       }
       if (auto *I = dyn_cast<Instruction>(U)) {
         if (I->getOpcode() == Instruction::BitCast ||
             I->getOpcode() == Instruction::AddrSpaceCast)
-          refineUsesAlignment(I, A, DL, MaxDepth - 1);
+          refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias,
+                                   MaxDepth - 1);
       }
     }
   }
Index: llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll
@@ -0,0 +1,77 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -O3 < %s | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
+
+@a = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
+@b = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
+@c = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
+
+; GCN-LABEL: {{^}}no_clobber_ds_load_stores_x2:
+; GCN: ds_write2st64_b32
+; GCN: ds_read2st64_b32
+
+; CHECK-LABEL: @no_clobber_ds_load_stores_x2
+; CHECK: store i32 1, i32 addrspace(3)* %0, align 16, !alias.scope !0, !noalias !2
+; CHECK: %val.a = load i32, i32 addrspace(3)* %gep.a, align 4, !alias.scope !0, !noalias !2
+; CHECK: store i32 2, i32 addrspace(3)* %1, align 16, !alias.scope !3, !noalias !5
+; CHECK: %val.b = load i32, i32 addrspace(3)* %gep.b, align 4, !alias.scope !3, !noalias !5
+
+define amdgpu_kernel void @no_clobber_ds_load_stores_x2(i32 addrspace(1)* %arg, i32 %i) {
+bb:
+  store i32 1, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @a, i32 0, i32 0), align 4
+  %gep.a = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @a, i32 0, i32 %i
+  %val.a = load i32, i32 addrspace(3)* %gep.a, align 4
+  store i32 2, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 0), align 4
+  %gep.b = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 %i
+  %val.b = load i32, i32 addrspace(3)* %gep.b, align 4
+  %val = add i32 %val.a, %val.b
+  store i32 %val, i32 addrspace(1)* %arg, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}no_clobber_ds_load_stores_x3:
+; GCN-DAG: ds_write2st64_b32
+; GCN-DAG: ds_write_b32
+; GCN-DAG: ds_read2st64_b32
+; GCN-DAG: ds_read_b32
+
+; CHECK-LABEL: @no_clobber_ds_load_stores_x3
+; CHECK: store i32 1, i32 addrspace(3)* %0, align 16, !alias.scope !6, !noalias !8
+; CHECK: %val.a = load i32, i32 addrspace(3)* %gep.a, align 4, !alias.scope !6, !noalias !8
+; CHECK: store i32 2, i32 addrspace(3)* %1, align 16, !alias.scope !9, !noalias !13
+; CHECK: %val.b = load i32, i32 addrspace(3)* %gep.b, align 4, !alias.scope !9, !noalias !13
+; CHECK: store i32 3, i32 addrspace(3)* %2, align 16, !alias.scope !11, !noalias !14
+; CHECK: %val.c = load i32, i32 addrspace(3)* %gep.c, align 4, !alias.scope !11, !noalias !14
+
+define amdgpu_kernel void @no_clobber_ds_load_stores_x3(i32 addrspace(1)* %arg, i32 %i) {
+bb:
+  store i32 1, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @a, i32 0, i32 0), align 4
+  %gep.a = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @a, i32 0, i32 %i
+  %val.a = load i32, i32 addrspace(3)* %gep.a, align 4
+  store i32 2, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 0), align 4
+  %gep.b = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @b, i32 0, i32 %i
+  %val.b = load i32, i32 addrspace(3)* %gep.b, align 4
+  store i32 3, i32 addrspace(3)* getelementptr inbounds ([64 x i32], [64 x i32] addrspace(3)* @c, i32 0, i32 0), align 4
+  %gep.c = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* @c, i32 0, i32 %i
+  %val.c = load i32, i32 addrspace(3)* %gep.c, align 4
+  %val.1 = add i32 %val.a, %val.b
+  %val = add i32 %val.1, %val.c
+  store i32 %val, i32 addrspace(1)* %arg, align 4
+  ret void
+}
+
+; CHECK: !0 = distinct !{!0, !1}
+; CHECK: !1 = distinct !{!1}
+; CHECK: !2 = !{!3}
+; CHECK: !3 = distinct !{!3, !4}
+; CHECK: !4 = distinct !{!4}
+; CHECK: !5 = !{!0}
+; CHECK: !6 = distinct !{!6, !7}
+; CHECK: !7 = distinct !{!7}
+; CHECK: !8 = !{!9, !11}
+; CHECK: !9 = distinct !{!9, !10}
+; CHECK: !10 = distinct !{!10}
+; CHECK: !11 = distinct !{!11, !12}
+; CHECK: !12 = distinct !{!12}
+; CHECK: !13 = !{!6, !11}
+; CHECK: !14 = !{!6, !9}