Index: llvm/include/llvm/Analysis/ScopedNoAliasAA.h =================================================================== --- llvm/include/llvm/Analysis/ScopedNoAliasAA.h +++ llvm/include/llvm/Analysis/ScopedNoAliasAA.h @@ -25,6 +25,27 @@ class MDNode; class MemoryLocation; +/// This is a simple wrapper around an MDNode which provides a higher-level +/// interface by hiding the details of how alias analysis information is encoded +/// in its operands. +class AliasScopeNode { + const MDNode *Node = nullptr; + +public: + AliasScopeNode() = default; + explicit AliasScopeNode(const MDNode *N) : Node(N) {} + + /// Get the MDNode for this AliasScopeNode. + const MDNode *getNode() const { return Node; } + + /// Get the MDNode for this AliasScopeNode's domain. + const MDNode *getDomain() const { + if (Node->getNumOperands() < 2) + return nullptr; + return dyn_cast_or_null<MDNode>(Node->getOperand(1)); + } +}; + /// A simple AA result which uses scoped-noalias metadata to answer queries. class ScopedNoAliasAAResult : public AAResultBase<ScopedNoAliasAAResult> { friend AAResultBase<ScopedNoAliasAAResult>; Index: llvm/lib/Analysis/ScopedNoAliasAA.cpp =================================================================== --- llvm/lib/Analysis/ScopedNoAliasAA.cpp +++ llvm/lib/Analysis/ScopedNoAliasAA.cpp @@ -50,31 +50,6 @@ static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias", cl::init(true), cl::Hidden); -namespace { - -/// This is a simple wrapper around an MDNode which provides a higher-level -/// interface by hiding the details of how alias analysis information is encoded -/// in its operands. -class AliasScopeNode { - const MDNode *Node = nullptr; - -public: - AliasScopeNode() = default; - explicit AliasScopeNode(const MDNode *N) : Node(N) {} - - /// Get the MDNode for this AliasScopeNode. - const MDNode *getNode() const { return Node; } - - /// Get the MDNode for this AliasScopeNode's domain. - const MDNode *getDomain() const { - if (Node->getNumOperands() < 2) - return nullptr; - return dyn_cast_or_null<MDNode>(Node->getOperand(1)); - } -}; - -} // end anonymous namespace - AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB, AAQueryInfo &AAQI) { Index: llvm/lib/IR/Metadata.cpp =================================================================== --- llvm/lib/IR/Metadata.cpp +++ llvm/lib/IR/Metadata.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -926,7 +927,32 @@ if (!A || !B) return nullptr; - return concatenate(A, B); + // Take the intersection of domains then union the scopes + // within those domains + SmallPtrSet<const MDNode *, 16> ADomains; + SmallPtrSet<const MDNode *, 16> IntersectDomains; + SmallSetVector<Metadata *, 4> MDs; + for (const MDOperand &MDOp : A->operands()) + if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp)) + if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain()) + ADomains.insert(Domain); + + for (const MDOperand &MDOp : B->operands()) + if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp)) + if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain()) + if (ADomains.contains(Domain)) { + IntersectDomains.insert(Domain); + MDs.insert(MDOp); + } + + for (const MDOperand &MDOp : A->operands()) + if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp)) + if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain()) + if (IntersectDomains.contains(Domain)) + MDs.insert(MDOp); + + return MDs.empty() ? nullptr + : getOrSelfReference(A->getContext(), MDs.getArrayRef()); } MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) { Index: llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -S -memcpyopt | FileCheck --match-full-lines %s + +; Alias scopes are merged by taking the intersection of domains, then the union of the scopes within those domains +define i8 @test(i8 %input) { + %tmp = alloca i8 + %dst = alloca i8 + %src = alloca i8 +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false), !alias.scope ![[SCOPE:[0-9]+]] + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %src), !noalias !4 + store i8 %input, i8* %src + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 1, i1 false), !alias.scope !0 + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 1, i1 false), !alias.scope !4 + %ret_value = load i8, i8* %dst + ret i8 %ret_value +} + +; Merged scope contains "callee0: %a" and "callee0 : %b" +; CHECK: ![[SCOPE]] = !{!{{[0-9]+}}, !{{[0-9]+}}} + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) + +!0 = !{!1, !7} +!1 = distinct !{!1, !3, !"callee0: %a"} +!2 = distinct !{!2, !3, !"callee0: %b"} +!3 = distinct !{!3, !"callee0"} + +!4 = !{!2, !5} +!5 = distinct !{!5, !6, !"callee1: %a"} +!6 = distinct !{!6, !"callee1"} + +!7 = distinct !{!7, !8, !"callee2: %a"} +!8 = distinct !{!8, !"callee2"} Index: llvm/test/Transforms/GVN/noalias.ll =================================================================== --- llvm/test/Transforms/GVN/noalias.ll +++ llvm/test/Transforms/GVN/noalias.ll @@ -5,7 +5,7 @@ ; CHECK: load i32, i32* %p ; CHECK-NOT: noalias ; CHECK: %c = add i32 %a, %a - %a = load i32, i32* %p, !noalias !0 + %a = load i32, i32* %p, !noalias !3 %b = load i32, i32* %p %c = add i32 %a, %b ret i32 %c @@ -13,31 +13,32 @@ define i32 @test2(i32* %p, i32* %q) { ; CHECK-LABEL: @test2(i32* %p, i32* %q) -; CHECK: load i32, i32* %p, align 4, !alias.scope !0 +; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE1:[0-9]+]] ; CHECK: %c = add i32 %a, %a - %a = load i32, i32* %p, !alias.scope !0 - %b = load i32, i32* %p, !alias.scope !0 + %a = load i32, i32* %p, !alias.scope !3 + %b = load i32, i32* %p, !alias.scope !3 %c = add i32 %a, %b ret i32 %c } -; FIXME: In this case we can do better than intersecting the scopes, and can -; concatenate them instead. Both loads are in the same basic block, the first -; makes the second safe to speculatively execute, and there are no calls that may -; throw in between. define i32 @test3(i32* %p, i32* %q) { ; CHECK-LABEL: @test3(i32* %p, i32* %q) -; CHECK: load i32, i32* %p, align 4, !alias.scope !1 +; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE2:[0-9]+]] ; CHECK: %c = add i32 %a, %a - %a = load i32, i32* %p, !alias.scope !1 - %b = load i32, i32* %p, !alias.scope !2 + %a = load i32, i32* %p, !alias.scope !4 + %b = load i32, i32* %p, !alias.scope !5 %c = add i32 %a, %b ret i32 %c } +; CHECK: ![[SCOPE1]] = !{!{{[0-9]+}}} +; CHECK: ![[SCOPE2]] = !{!{{[0-9]+}}, !{{[0-9]+}}} declare i32 @foo(i32*) readonly -!0 = !{!0} -!1 = !{!1} -!2 = !{!0, !1} +!0 = distinct !{!0, !2, !"callee0: %a"} +!1 = distinct !{!1, !2, !"callee0: %b"} +!2 = distinct !{!2, !"callee0"} +!3 = !{!0} +!4 = !{!1} +!5 = !{!0, !1} Index: llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll =================================================================== --- llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll +++ llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll @@ -40,10 +40,10 @@ ; CHECK: ![[TBAA]] = !{![[TAG1:[0-9]+]], ![[TAG1]], i64 0} ; CHECK: ![[TAG1]] = !{!"int", !{{[0-9]+}}, i64 0} ; CHECK: ![[RANGE]] = !{i32 10, i32 25} -; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE2:[0-9]+]], ![[SCOPE1:[0-9]+]]} +; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE1:[0-9]+]], ![[SCOPE2:[0-9]+]]} ; CHECK: ![[SCOPE0]] = distinct !{![[SCOPE0]], !{{[0-9]+}}, !"scope0"} -; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"} ; CHECK: ![[SCOPE1]] = distinct !{![[SCOPE1]], !{{[0-9]+}}, !"scope1"} +; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"} ; CHECK: ![[NOALIAS]] = !{![[SCOPE3:[0-9]+]]} ; CHECK: ![[SCOPE3]] = distinct !{![[SCOPE3]], !{{[0-9]+}}, !"scope3"} Index: llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -S -memcpyopt | FileCheck --match-full-lines %s + +; Make sure callslot optimization merges alias.scope metadata correctly when it merges instructions. +; Merging here naively generates: +; call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false), !alias.scope !3 +; call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !0 +; ... +; !0 = !{!1} +; !1 = distinct !{!1, !2, !"callee1: %a"} +; !2 = distinct !{!2, !"callee1"} +; !3 = !{!1, !4} +; !4 = distinct !{!4, !5, !"callee0: %a"} +; !5 = distinct !{!5, !"callee0"} +; Which is incorrect because the lifetime.end of %src will now "noalias" the above memcpy. +define i8 @test(i8 %input) { + %tmp = alloca i8 + %dst = alloca i8 + %src = alloca i8 +; NOTE: we're matching the full line and looking for the lack of !alias.scope here +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false) + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %src), !noalias !3 + store i8 %input, i8* %src + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 1, i1 false), !alias.scope !0 + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !3 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 1, i1 false), !alias.scope !3 + %ret_value = load i8, i8* %dst + ret i8 %ret_value +} + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) + +!0 = !{!1} +!1 = distinct !{!1, !2, !"callee0: %a"} +!2 = distinct !{!2, !"callee0"} +!3 = !{!4} +!4 = distinct !{!4, !5, !"callee1: %a"} +!5 = distinct !{!5, !"callee1"} Index: llvm/test/Transforms/NewGVN/noalias.ll =================================================================== --- llvm/test/Transforms/NewGVN/noalias.ll +++ llvm/test/Transforms/NewGVN/noalias.ll @@ -5,7 +5,7 @@ ; CHECK: load i32, i32* %p ; CHECK-NOT: noalias ; CHECK: %c = add i32 %a, %a - %a = load i32, i32* %p, !noalias !0 + %a = load i32, i32* %p, !noalias !3 %b = load i32, i32* %p %c = add i32 %a, %b ret i32 %c @@ -13,31 +13,32 @@ define i32 @test2(i32* %p, i32* %q) { ; CHECK-LABEL: @test2(i32* %p, i32* %q) -; CHECK: load i32, i32* %p, align 4, !alias.scope !0 +; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE1:[0-9]+]] ; CHECK: %c = add i32 %a, %a - %a = load i32, i32* %p, !alias.scope !0 - %b = load i32, i32* %p, !alias.scope !0 + %a = load i32, i32* %p, !alias.scope !3 + %b = load i32, i32* %p, !alias.scope !3 %c = add i32 %a, %b ret i32 %c } -; FIXME: In this case we can do better than intersecting the scopes, and can -; concatenate them instead. Both loads are in the same basic block, the first -; makes the second safe to speculatively execute, and there are no calls that may -; throw in between. define i32 @test3(i32* %p, i32* %q) { ; CHECK-LABEL: @test3(i32* %p, i32* %q) -; CHECK: load i32, i32* %p, align 4, !alias.scope !1 +; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE2:[0-9]+]] ; CHECK: %c = add i32 %a, %a - %a = load i32, i32* %p, !alias.scope !1 - %b = load i32, i32* %p, !alias.scope !2 + %a = load i32, i32* %p, !alias.scope !4 + %b = load i32, i32* %p, !alias.scope !5 %c = add i32 %a, %b ret i32 %c } +; CHECK: ![[SCOPE1]] = !{!{{[0-9]+}}} +; CHECK: ![[SCOPE2]] = !{!{{[0-9]+}}, !{{[0-9]+}}} declare i32 @foo(i32*) readonly -!0 = !{!0} -!1 = !{!1} -!2 = !{!0, !1} +!0 = distinct !{!0, !2, !"callee0: %a"} +!1 = distinct !{!1, !2, !"callee0: %b"} +!2 = distinct !{!2, !"callee0"} +!3 = !{!0} +!4 = !{!1} +!5 = !{!0, !1}