Index: llvm/include/llvm/Analysis/ScopedNoAliasAA.h
===================================================================
--- llvm/include/llvm/Analysis/ScopedNoAliasAA.h
+++ llvm/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -25,6 +25,27 @@
 class MDNode;
 class MemoryLocation;
 
+/// This is a simple wrapper around an MDNode which provides a higher-level
+/// interface by hiding the details of how alias analysis information is encoded
+/// in its operands.
+class AliasScopeNode {
+  const MDNode *Node = nullptr;
+
+public:
+  AliasScopeNode() = default;
+  explicit AliasScopeNode(const MDNode *N) : Node(N) {}
+
+  /// Get the MDNode for this AliasScopeNode.
+  const MDNode *getNode() const { return Node; }
+
+  /// Get the MDNode for this AliasScopeNode's domain.
+  const MDNode *getDomain() const {
+    if (Node->getNumOperands() < 2)
+      return nullptr;
+    return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+  }
+};
+
 /// A simple AA result which uses scoped-noalias metadata to answer queries.
 class ScopedNoAliasAAResult : public AAResultBase<ScopedNoAliasAAResult> {
   friend AAResultBase<ScopedNoAliasAAResult>;
Index: llvm/lib/Analysis/ScopedNoAliasAA.cpp
===================================================================
--- llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -50,31 +50,6 @@
 static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias",
                                          cl::init(true), cl::Hidden);
 
-namespace {
-
-/// This is a simple wrapper around an MDNode which provides a higher-level
-/// interface by hiding the details of how alias analysis information is encoded
-/// in its operands.
-class AliasScopeNode {
-  const MDNode *Node = nullptr;
-
-public:
-  AliasScopeNode() = default;
-  explicit AliasScopeNode(const MDNode *N) : Node(N) {}
-
-  /// Get the MDNode for this AliasScopeNode.
-  const MDNode *getNode() const { return Node; }
-
-  /// Get the MDNode for this AliasScopeNode's domain.
-  const MDNode *getDomain() const {
-    if (Node->getNumOperands() < 2)
-      return nullptr;
-    return dyn_cast_or_null<MDNode>(Node->getOperand(1));
-  }
-};
-
-} // end anonymous namespace
-
 AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
                                          const MemoryLocation &LocB,
                                          AAQueryInfo &AAQI) {
Index: llvm/lib/IR/Metadata.cpp
===================================================================
--- llvm/lib/IR/Metadata.cpp
+++ llvm/lib/IR/Metadata.cpp
@@ -27,6 +27,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -926,7 +927,32 @@
   if (!A || !B)
     return nullptr;
 
-  return concatenate(A, B);
+  // Take the intersection of domains then union the scopes
+  // within those domains
+  SmallPtrSet<const MDNode *, 16> ADomains;
+  SmallPtrSet<const MDNode *, 16> IntersectDomains;
+  SmallSetVector<Metadata *, 4> MDs;
+  for (const MDOperand &MDOp : A->operands())
+    if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
+      if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+        ADomains.insert(Domain);
+
+  for (const MDOperand &MDOp : B->operands())
+    if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
+      if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+        if (ADomains.contains(Domain)) {
+          IntersectDomains.insert(Domain);
+          MDs.insert(MDOp);
+        }
+
+  for (const MDOperand &MDOp : A->operands())
+    if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
+      if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+        if (IntersectDomains.contains(Domain))
+          MDs.insert(MDOp);
+
+  return MDs.empty() ? nullptr
+                     : getOrSelfReference(A->getContext(), MDs.getArrayRef());
 }
 
 MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
Index: llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
===================================================================
--- /dev/null
+++ llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -S -memcpyopt | FileCheck --match-full-lines %s
+
+; Alias scopes are merged by taking the intersection of domains, then the union of the scopes within those domains
+define i8 @test(i8 %input) {
+  %tmp = alloca i8
+  %dst = alloca i8
+  %src = alloca i8
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false), !alias.scope ![[SCOPE:[0-9]+]]
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %src), !noalias !4
+  store i8 %input, i8* %src
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 1, i1 false), !alias.scope !0
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !4
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 1, i1 false), !alias.scope !4
+  %ret_value = load i8, i8* %dst
+  ret i8 %ret_value
+}
+
+; Merged scope contains "callee0: %a" and "callee0 : %b"
+; CHECK:   ![[SCOPE]] = !{!{{[0-9]+}}, !{{[0-9]+}}}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
+
+!0 = !{!1, !7}
+!1 = distinct !{!1, !3, !"callee0: %a"}
+!2 = distinct !{!2, !3, !"callee0: %b"}
+!3 = distinct !{!3, !"callee0"}
+
+!4 = !{!2, !5}
+!5 = distinct !{!5, !6, !"callee1: %a"}
+!6 = distinct !{!6, !"callee1"}
+
+!7 = distinct !{!7, !8, !"callee2: %a"}
+!8 = distinct !{!8, !"callee2"}
Index: llvm/test/Transforms/GVN/noalias.ll
===================================================================
--- llvm/test/Transforms/GVN/noalias.ll
+++ llvm/test/Transforms/GVN/noalias.ll
@@ -5,7 +5,7 @@
 ; CHECK: load i32, i32* %p
 ; CHECK-NOT: noalias
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32, i32* %p, !noalias !0
+  %a = load i32, i32* %p, !noalias !3
   %b = load i32, i32* %p
   %c = add i32 %a, %b
   ret i32 %c
@@ -13,31 +13,32 @@
 
 define i32 @test2(i32* %p, i32* %q) {
 ; CHECK-LABEL: @test2(i32* %p, i32* %q)
-; CHECK: load i32, i32* %p, align 4, !alias.scope !0
+; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE1:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32, i32* %p, !alias.scope !0
-  %b = load i32, i32* %p, !alias.scope !0
+  %a = load i32, i32* %p, !alias.scope !3
+  %b = load i32, i32* %p, !alias.scope !3
   %c = add i32 %a, %b
   ret i32 %c
 }
 
-; FIXME: In this case we can do better than intersecting the scopes, and can
-; concatenate them instead. Both loads are in the same basic block, the first
-; makes the second safe to speculatively execute, and there are no calls that may
-; throw in between.
 define i32 @test3(i32* %p, i32* %q) {
 ; CHECK-LABEL: @test3(i32* %p, i32* %q)
-; CHECK: load i32, i32* %p, align 4, !alias.scope !1
+; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE2:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32, i32* %p, !alias.scope !1
-  %b = load i32, i32* %p, !alias.scope !2
+  %a = load i32, i32* %p, !alias.scope !4
+  %b = load i32, i32* %p, !alias.scope !5
   %c = add i32 %a, %b
   ret i32 %c
 }
 
+; CHECK:   ![[SCOPE1]] = !{!{{[0-9]+}}}
+; CHECK:   ![[SCOPE2]] = !{!{{[0-9]+}}, !{{[0-9]+}}}
 declare i32 @foo(i32*) readonly
 
-!0 = !{!0}
-!1 = !{!1}
-!2 = !{!0, !1}
+!0 = distinct !{!0, !2, !"callee0: %a"}
+!1 = distinct !{!1, !2, !"callee0: %b"}
+!2 = distinct !{!2, !"callee0"}
 
+!3 = !{!0}
+!4 = !{!1}
+!5 = !{!0, !1}
Index: llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
===================================================================
--- llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
+++ llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
@@ -40,10 +40,10 @@
 ; CHECK: ![[TBAA]] = !{![[TAG1:[0-9]+]], ![[TAG1]], i64 0}
 ; CHECK: ![[TAG1]] = !{!"int", !{{[0-9]+}}, i64 0}
 ; CHECK: ![[RANGE]] = !{i32 10, i32 25}
-; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE2:[0-9]+]], ![[SCOPE1:[0-9]+]]}
+; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE1:[0-9]+]], ![[SCOPE2:[0-9]+]]}
 ; CHECK: ![[SCOPE0]] = distinct !{![[SCOPE0]], !{{[0-9]+}}, !"scope0"}
-; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"}
 ; CHECK: ![[SCOPE1]] = distinct !{![[SCOPE1]], !{{[0-9]+}}, !"scope1"}
+; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"}
 ; CHECK: ![[NOALIAS]] = !{![[SCOPE3:[0-9]+]]}
 ; CHECK: ![[SCOPE3]] = distinct !{![[SCOPE3]], !{{[0-9]+}}, !"scope3"}
 
Index: llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -S -memcpyopt | FileCheck --match-full-lines %s
+
+; Make sure callslot optimization merges alias.scope metadata correctly when it merges instructions.
+; Merging here naively generates:
+;  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false), !alias.scope !3
+;  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !0
+;   ...
+;  !0 = !{!1}
+;  !1 = distinct !{!1, !2, !"callee1: %a"}
+;  !2 = distinct !{!2, !"callee1"}
+;  !3 = !{!1, !4}
+;  !4 = distinct !{!4, !5, !"callee0: %a"}
+;  !5 = distinct !{!5, !"callee0"}
+; Which is incorrect because the lifetime.end of %src will now "noalias" the above memcpy.
+define i8 @test(i8 %input) {
+  %tmp = alloca i8
+  %dst = alloca i8
+  %src = alloca i8
+; NOTE: we're matching the full line and looking for the lack of !alias.scope here
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false)
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %src), !noalias !3
+  store i8 %input, i8* %src
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 1, i1 false), !alias.scope !0
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !3
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 1, i1 false), !alias.scope !3
+  %ret_value = load i8, i8* %dst
+  ret i8 %ret_value
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
+
+!0 = !{!1}
+!1 = distinct !{!1, !2, !"callee0: %a"}
+!2 = distinct !{!2, !"callee0"}
+!3 = !{!4}
+!4 = distinct !{!4, !5, !"callee1: %a"}
+!5 = distinct !{!5, !"callee1"}
Index: llvm/test/Transforms/NewGVN/noalias.ll
===================================================================
--- llvm/test/Transforms/NewGVN/noalias.ll
+++ llvm/test/Transforms/NewGVN/noalias.ll
@@ -5,7 +5,7 @@
 ; CHECK: load i32, i32* %p
 ; CHECK-NOT: noalias
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32, i32* %p, !noalias !0
+  %a = load i32, i32* %p, !noalias !3
   %b = load i32, i32* %p
   %c = add i32 %a, %b
   ret i32 %c
@@ -13,31 +13,32 @@
 
 define i32 @test2(i32* %p, i32* %q) {
 ; CHECK-LABEL: @test2(i32* %p, i32* %q)
-; CHECK: load i32, i32* %p, align 4, !alias.scope !0
+; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE1:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32, i32* %p, !alias.scope !0
-  %b = load i32, i32* %p, !alias.scope !0
+  %a = load i32, i32* %p, !alias.scope !3
+  %b = load i32, i32* %p, !alias.scope !3
   %c = add i32 %a, %b
   ret i32 %c
 }
 
-; FIXME: In this case we can do better than intersecting the scopes, and can
-; concatenate them instead. Both loads are in the same basic block, the first
-; makes the second safe to speculatively execute, and there are no calls that may
-; throw in between.
 define i32 @test3(i32* %p, i32* %q) {
 ; CHECK-LABEL: @test3(i32* %p, i32* %q)
-; CHECK: load i32, i32* %p, align 4, !alias.scope !1
+; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE2:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32, i32* %p, !alias.scope !1
-  %b = load i32, i32* %p, !alias.scope !2
+  %a = load i32, i32* %p, !alias.scope !4
+  %b = load i32, i32* %p, !alias.scope !5
   %c = add i32 %a, %b
   ret i32 %c
 }
 
+; CHECK:   ![[SCOPE1]] = !{!{{[0-9]+}}}
+; CHECK:   ![[SCOPE2]] = !{!{{[0-9]+}}, !{{[0-9]+}}}
 declare i32 @foo(i32*) readonly
 
-!0 = !{!0}
-!1 = !{!1}
-!2 = !{!0, !1}
+!0 = distinct !{!0, !2, !"callee0: %a"}
+!1 = distinct !{!1, !2, !"callee0: %b"}
+!2 = distinct !{!2, !"callee0"}
 
+!3 = !{!0}
+!4 = !{!1}
+!5 = !{!0, !1}