diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -1039,7 +1039,8 @@ // updated if a new clobber is found by this SkipSelf search. If this // additional query becomes heavily used we may decide to cache the result. // Walker instantiations will decide how to set the SkipSelf bool. - MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool); + MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool, + bool UseInvariantGroup = true); }; /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no @@ -1064,6 +1065,11 @@ unsigned &UWL) { return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL); } + // This method is not accessible outside of this file. + MemoryAccess *getClobberingMemoryAccessWithoutInvariantGroup(MemoryAccess *MA, + unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, UWL, false, false); + } MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override { unsigned UpwardWalkLimit = MaxCheckLimit; @@ -1460,10 +1466,13 @@ unsigned UpwardWalkLimit = MaxCheckLimit; while (UpperBound > LocInfo.LowerBound) { if (isa(VersionStack[UpperBound])) { - // For phis, use the walker, see where we ended up, go there + // For phis, use the walker, see where we ended up, go there. + // The invariant.group handling in MemorySSA is ad-hoc and doesn't + // support updates, so don't use it to optimize uses. MemoryAccess *Result = - Walker->getClobberingMemoryAccess(MU, UpwardWalkLimit); - // We are guaranteed to find it or something is wrong + Walker->getClobberingMemoryAccessWithoutInvariantGroup( + MU, UpwardWalkLimit); + // We are guaranteed to find it or something is wrong. while (VersionStack[UpperBound] != Result) { assert(UpperBound != 0); --UpperBound; @@ -2469,15 +2478,93 @@ return Clobber; } +static Instruction *getInvariantGroupClobberingInstruction(Instruction &I, + DominatorTree &DT) { + if (!I.hasMetadata(LLVMContext::MD_invariant_group)) + return nullptr; + + // We consider bitcasts and zero GEPs to be the same pointer value. Start by + // stripping bitcasts and zero GEPs, then we will recursively look at loads + // and stores through bitcasts and zero GEPs. + Value *PointerOperand = getLoadStorePointerOperand(&I)->stripPointerCasts(); + + // It's not safe to walk the use list of global value because function passes + // aren't allowed to look outside their functions. + // FIXME: this could be fixed by filtering instructions from outside of + // current function. + if (isa(PointerOperand)) + return nullptr; + + // Queue to process all pointers that are equivalent to load operand. + SmallVector PointerUsesQueue; + PointerUsesQueue.push_back(PointerOperand); + + Instruction *MostDominatingInstruction = &I; + + // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case + // we will see all the instructions. It may not matter in practice. If it + // does, we will have to support MemorySSA construction and updates. + while (!PointerUsesQueue.empty()) { + const Value *Ptr = PointerUsesQueue.pop_back_val(); + assert(Ptr && !isa(Ptr) && + "Null or GlobalValue should not be inserted"); + + for (const Use &Us : Ptr->uses()) { + auto *U = dyn_cast(Us.getUser()); + if (!U || U == &I || !DT.dominates(U, MostDominatingInstruction)) + continue; + + // Add bitcasts and zero GEPs to queue. + if (isa(U)) { + PointerUsesQueue.push_back(U); + continue; + } + if (auto *GEP = dyn_cast(U)) { + if (GEP->hasAllZeroIndices()) { + PointerUsesQueue.push_back(U); + continue; + } + } + + // If we hit a load/store with an invariant.group metadata and the same + // pointer operand, we can assume that value pointed to by the pointer + // operand didn't change. + if (U->hasMetadata(LLVMContext::MD_invariant_group) && + getLoadStorePointerOperand(U) == Ptr) { + if (DT.dominates(U, MostDominatingInstruction)) + MostDominatingInstruction = U; + } + } + } + return MostDominatingInstruction == &I ? nullptr : MostDominatingInstruction; +} + template MemoryAccess * MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase( - MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf) { + MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf, + bool UseInvariantGroup) { auto *StartingAccess = dyn_cast(MA); // If this is a MemoryPhi, we can't do anything. if (!StartingAccess) return MA; + if (UseInvariantGroup) { + if (auto *I = getInvariantGroupClobberingInstruction( + *StartingAccess->getMemoryInst(), MSSA->getDomTree())) { + assert(isa(I) || isa(I)); + + auto *ClobberMA = MSSA->getMemoryAccess(I); + assert(ClobberMA); + if (auto *LI = dyn_cast(I)) { + auto *ClobberDA = ClobberMA->getDefiningAccess(); + assert(ClobberDA); + return ClobberDA; + } + return ClobberMA; + } + } + bool IsOptimized = false; // If this is an already optimized use or def, return the optimized result. diff --git a/llvm/test/Analysis/MemorySSA/invariant-groups.ll b/llvm/test/Analysis/MemorySSA/invariant-groups.ll --- a/llvm/test/Analysis/MemorySSA/invariant-groups.ll +++ b/llvm/test/Analysis/MemorySSA/invariant-groups.ll @@ -1,11 +1,23 @@ ; RUN: opt -aa-pipeline=basic-aa -passes='print' -verify-memoryssa < %s 2>&1 | FileCheck %s -; -; Currently, MemorySSA doesn't support invariant groups. So, we should ignore -; launder.invariant.group intrinsics entirely. We'll need to pay attention to -; them when/if we decide to support invariant groups. @g = external global i32 +define i32 @global() { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 0 + store i32 0, i32* @g, align 4, !invariant.group !0 + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: call void @clobber + call void @clobber(i32* @g) + +; Don't optimize globals. +; CHECK: MemoryUse(2) {{.*}} clobbered by 2 +; CHECK-NEXT: %1 = load i32 + %1 = load i32, i32* @g, align 4, !invariant.group !0 + ret i32 %1 +} + define i32 @foo(i32* %a) { ; CHECK: 1 = MemoryDef(liveOnEntry) ; CHECK-NEXT: store i32 0 @@ -67,8 +79,7 @@ ; CHECK-NEXT: store i32 1 store i32 1, i32* @g, align 4 -; FIXME: based on invariant.group it should be MemoryUse(liveOnEntry) -; CHECK: MemoryUse(2) {{.*}} clobbered by 2 +; CHECK: MemoryUse(2) {{.*}} clobbered by liveOnEntry ; CHECK-NEXT: %v3 = load i32 %v3 = load i32, i32* %a32, align 4, !invariant.group !0 %add = add nsw i32 %v2, %v3 @@ -98,8 +109,7 @@ ; CHECK-NEXT: store i32 2 store i32 2, i32* @g, align 4 -; FIXME: This can be changed to MemoryUse(2) -; CHECK: MemoryUse(4) {{.*}} clobbered by 4 +; CHECK: MemoryUse(4) {{.*}} clobbered by 2 ; CHECK-NEXT: %3 = load i32 %3 = load i32, i32* %a32, align 4, !invariant.group !0 %add = add nsw i32 %2, %3 @@ -118,15 +128,13 @@ br i1 %a, label %Loop.Body, label %Loop.End Loop.Body: -; FIXME: MemoryUse(1) -; CHECK: MemoryUse(2) {{.*}} clobbered by 2 +; CHECK: MemoryUse(2) {{.*}} clobbered by 1 ; CHECK-NEXT: %1 = load i32 %1 = load i32, i32* %0, !invariant.group !0 br i1 %a, label %Loop.End, label %Loop.Body Loop.End: -; FIXME: MemoryUse(1) -; CHECK: MemoryUse(2) {{.*}} clobbered by 2 +; CHECK: MemoryUse(2) {{.*}} clobbered by 1 ; CHECK-NEXT: %2 = load %2 = load i32, i32* %0, align 4, !invariant.group !0 br i1 %a, label %Ret, label %Loop.Body @@ -154,8 +162,7 @@ ; CHECK-NEXT: %0 = load i8 %0 = load i8, i8* %after, !invariant.group !0 -; FIXME: MemoryUse(1) -; CHECK: MemoryUse(6) {{.*}} clobbered by 6 +; CHECK: MemoryUse(6) {{.*}} clobbered by 1 ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %p, !invariant.group !0 @@ -169,8 +176,7 @@ ; CHECK-NEXT: %2 = load %2 = load i8, i8* %after, align 4, !invariant.group !0 -; FIXME: MemoryUse(1) -; CHECK: MemoryUse(5) {{.*}} clobbered by 5 +; CHECK: MemoryUse(5) {{.*}} clobbered by 1 ; CHECK-NEXT: %3 = load %3 = load i8, i8* %p, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -203,8 +209,7 @@ ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(8) -; CHECK: MemoryUse(4) {{.*}} clobbered by 4 +; CHECK: MemoryUse(4) {{.*}} clobbered by 8 ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %after, !invariant.group !0 @@ -214,8 +219,7 @@ ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(8) -; CHECK: MemoryUse(5) {{.*}} clobbered by 5 +; CHECK: MemoryUse(5) {{.*}} clobbered by 8 ; CHECK-NEXT: %2 = load i8 %2 = load i8, i8* %after, !invariant.group !0 @@ -230,8 +234,7 @@ ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(7) -; CHECK: MemoryUse(6) {{.*}} clobbered by 6 +; CHECK: MemoryUse(6) {{.*}} clobbered by 7 ; CHECK-NEXT: %4 = load %4 = load i8, i8* %after, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -263,8 +266,7 @@ ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %after, !invariant.group !0 -; FIXME: MemoryUse(2) -; CHECK: MemoryUse(6) {{.*}} clobbered by 6 +; CHECK: MemoryUse(6) {{.*}} clobbered by 1 ; CHECK-NEXT: %2 = load i8 %2 = load i8, i8* %p, !invariant.group !0 @@ -277,8 +279,7 @@ ; CHECK-NEXT: %3 = load %3 = load i8, i8* %after, align 4, !invariant.group !0 -; FIXME: MemoryUse(2) -; CHECK: MemoryUse(5) {{.*}} clobbered by 5 +; CHECK: MemoryUse(5) {{.*}} clobbered by 1 ; CHECK-NEXT: %4 = load %4 = load i8, i8* %p, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body diff --git a/llvm/test/Transforms/NewGVN/invariant.group-xfail.ll b/llvm/test/Transforms/NewGVN/invariant.group.ll rename from llvm/test/Transforms/NewGVN/invariant.group-xfail.ll rename to llvm/test/Transforms/NewGVN/invariant.group.ll --- a/llvm/test/Transforms/NewGVN/invariant.group-xfail.ll +++ b/llvm/test/Transforms/NewGVN/invariant.group.ll @@ -1,5 +1,4 @@ -; XFAIL: * -; RUN: opt < %s -newgvn -S | FileCheck %s +; RUN: opt < %s -passes=newgvn -S | FileCheck %s %struct.A = type { i32 (...)** } @_ZTV1A = available_externally unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 8 @@ -54,9 +53,6 @@ ; CHECK-LABEL: define i1 @proveEqualityForStrip( define i1 @proveEqualityForStrip(i8* %a) { -; FIXME: The first call could be also removed by GVN. Right now -; DCE removes it. The second call is CSE'd with the first one. -; CHECK: %b1 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) %b1 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) ; CHECK-NOT: llvm.strip.invariant.group %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) @@ -96,7 +92,7 @@ %3 = load %struct.A*, %struct.A** %a, align 8 %4 = bitcast %struct.A* %3 to void (%struct.A*)*** -; CHECK: call void @_ZN1A3fooEv( +; FIXME: call void @_ZN1A3fooEv( %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !0 %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0 %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8 @@ -104,7 +100,7 @@ %6 = load %struct.A*, %struct.A** %a, align 8 %7 = bitcast %struct.A* %6 to void (%struct.A*)*** -; CHECK: call void @_ZN1A3fooEv( +; FIXME: call void @_ZN1A3fooEv( %vtable2 = load void (%struct.A*)**, void (%struct.A*)*** %7, align 8, !invariant.group !0 %vfn3 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable2, i64 0 %8 = load void (%struct.A*)*, void (%struct.A*)** %vfn3, align 8 @@ -116,14 +112,14 @@ %vtable4 = load void (%struct.A*)**, void (%struct.A*)*** %10, align 8, !invariant.group !0 %vfn5 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable4, i64 0 %11 = load void (%struct.A*)*, void (%struct.A*)** %vfn5, align 8 -; CHECK: call void @_ZN1A3fooEv( +; FIXME: call void @_ZN1A3fooEv( call void %11(%struct.A* %9) %vtable5 = load i8**, i8*** %2, align 8, !invariant.group !0 %vfn6 = getelementptr inbounds i8*, i8** %vtable5, i64 0 %12 = bitcast i8** %vfn6 to void (%struct.A*)** %13 = load void (%struct.A*)*, void (%struct.A*)** %12, align 8 -; CHECK: call void @_ZN1A3fooEv( +; FIXME: call void @_ZN1A3fooEv( call void %13(%struct.A* %9) ret void @@ -145,7 +141,7 @@ %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) store %struct.A* %1, %struct.A** %a, align 8 -; CHECK-NOT: !invariant.group +; FIXME-NOT: !invariant.group %3 = load %struct.A*, %struct.A** %a, align 8 %4 = bitcast %struct.A* %3 to void (%struct.A*)*** @@ -163,7 +159,7 @@ %ptr = alloca i8 store i8 42, i8* %ptr call void @foo(i8* %ptr) -; CHECK: %[[A:.*]] = load i8, i8* %ptr, !invariant.group +; CHECK: %[[A:.*]] = load i8, i8* %ptr, align 1, !invariant.group %a = load i8, i8* %ptr, !invariant.group !0 ; CHECK-NOT: load %b = load i8, i8* %ptr, !invariant.group !0 @@ -180,7 +176,7 @@ %ptr = alloca i8 store i8 42, i8* %ptr call void @foo(i8* %ptr) -; CHECK: %[[D:.*]] = load i8, i8* %ptr, !invariant.group +; CHECK: %[[D:.*]] = load i8, i8* %ptr, align 1, !invariant.group %c = load i8, i8* %ptr ; CHECK-NOT: load %d = load i8, i8* %ptr, !invariant.group !0 @@ -197,7 +193,7 @@ %ptr = alloca i8 store i8 42, i8* %ptr call void @foo(i8* %ptr) -; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group +; CHECK: %[[E:.*]] = load i8, i8* %ptr, align 1, !invariant.group %e = load i8, i8* %ptr, !invariant.group !0 ; CHECK-NOT: load %f = load i8, i8* %ptr @@ -214,7 +210,7 @@ %ptr = alloca i8 store i8 42, i8* %ptr call void @foo(i8* %ptr) -; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group +; CHECK: %[[E:.*]] = load i8, i8* %ptr, align 1, !invariant.group %e = load i8, i8* %ptr, !invariant.group !0 ; CHECK-NOT: load %f = load i8, i8* %ptr, !invariant.group !0 @@ -257,10 +253,10 @@ %ptr = alloca i8 store i8 42, i8* %ptr, !invariant.group !0 %ptr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr) -; CHECK-NOT: load +; FIXME-NOT: load %a = load i8, i8* %ptr2, !invariant.group !0 -; CHECK: ret i8 42 +; FIXME: ret i8 42 ret i8 %a } @@ -320,13 +316,13 @@ %unknownValue = load i8, i8* @unknownPtr ; FIXME: Can assume that %unknownValue == 42 -; CHECK: store i8 %unknownValue, i8* %ptr, !invariant.group !0 +; CHECK: store i8 %unknownValue, i8* %ptr, align 1, !invariant.group !0 store i8 %unknownValue, i8* %ptr, !invariant.group !0 %newPtr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr) -; CHECK-NOT: load +; FIXME-NOT: load %d = load i8, i8* %newPtr2, !invariant.group !0 -; CHECK: ret i8 %unknownValue +; FIXME: ret i8 %unknownValue ret i8 %d } @@ -347,7 +343,7 @@ %6 = bitcast %struct.A* %a to void (%struct.A*)*** %7 = load void (%struct.A*)**, void (%struct.A*)*** %6, align 8, !invariant.group !0 %8 = load void (%struct.A*)*, void (%struct.A*)** %7, align 8 -; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a) +; FIXME: call void @_ZN1A3fooEv(%struct.A* nonnull %a) call void %8(%struct.A* nonnull %a) br label %_Z1gR1A.exit @@ -360,10 +356,10 @@ ; from the same function. ; CHECK-LABEL: define void @testGlobal() { define void @testGlobal() { -; CHECK: %a = load i8, i8* @unknownPtr, !invariant.group !0 +; CHECK: %a = load i8, i8* @unknownPtr, align 1, !invariant.group !0 %a = load i8, i8* @unknownPtr, !invariant.group !0 call void @foo2(i8* @unknownPtr, i8 %a) -; CHECK: %1 = load i8, i8* @unknownPtr, !invariant.group !0 +; CHECK: %1 = load i8, i8* @unknownPtr, align 1, !invariant.group !0 %1 = load i8, i8* @unknownPtr, !invariant.group !0 call void @bar(i8 %1) @@ -383,7 +379,7 @@ define void @testNotGlobal() { %a = alloca i8 call void @foo(i8* %a) -; CHECK: %b = load i8, i8* %a, !invariant.group !0 +; CHECK: %b = load i8, i8* %a, align 1, !invariant.group !0 %b = load i8, i8* %a, !invariant.group !0 call void @foo2(i8* %a, i8 %b) @@ -393,12 +389,12 @@ %b0 = bitcast i8* %a to i1* call void @fooBit(i1* %b0, i1 1) -; CHECK: %1 = trunc i8 %b to i1 +; FIXME: %1 = trunc i8 %b to i1 %2 = load i1, i1* %b0, !invariant.group !0 -; CHECK-NEXT: call void @fooBit(i1* %b0, i1 %1) +; FIXME-NEXT: call void @fooBit(i1* %b0, i1 %1) call void @fooBit(i1* %b0, i1 %2) %3 = load i1, i1* %b0, !invariant.group !0 -; CHECK-NEXT: call void @fooBit(i1* %b0, i1 %1) +; FIXME-NEXT: call void @fooBit(i1* %b0, i1 %1) call void @fooBit(i1* %b0, i1 %3) ret void } @@ -426,9 +422,9 @@ %8 = phi i8 [ %10, %._crit_edge ], [ 1, %._crit_edge.preheader ] %.pre = load void (%struct.A*)**, void (%struct.A*)*** %5, align 8, !invariant.group !0 %9 = load void (%struct.A*)*, void (%struct.A*)** %.pre, align 8 - ; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a) + ; FIXME: call void @_ZN1A3fooEv(%struct.A* nonnull %a) call void %9(%struct.A* nonnull %a) #3 - ; CHECK-NOT: call void % + ; FIXME-NOT: call void % %10 = add nuw nsw i8 %8, 1 %11 = load i8, i8* @unknownPtr, align 4 %12 = icmp slt i8 %10, %11 @@ -451,10 +447,11 @@ declare void @fooBit(i1*, i1) declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) ; Function Attrs: nounwind declare void @llvm.assume(i1 %cmp.vtables) #0 attributes #0 = { nounwind } -!0 = !{} \ No newline at end of file +!0 = !{} diff --git a/llvm/unittests/Analysis/MemorySSATest.cpp b/llvm/unittests/Analysis/MemorySSATest.cpp --- a/llvm/unittests/Analysis/MemorySSATest.cpp +++ b/llvm/unittests/Analysis/MemorySSATest.cpp @@ -1725,4 +1725,50 @@ EXPECT_TRUE(ItB->second.Size.getValue() == 8); } } -} \ No newline at end of file +} + +TEST_F(MemorySSATest, TestInvariantGroup) { + SMDiagnostic E; + auto M = parseAssemblyString("declare void @f(i8*)\n" + "define i8 @test(i8* %p) {\n" + "entry:\n" + " store i8 42, i8* %p, !invariant.group !0\n" + " call void @f(i8* %p)\n" + " %v = load i8, i8* %p, !invariant.group !0\n" + " ret i8 %v\n" + "}\n" + "!0 = !{}", + E, C); + ASSERT_TRUE(M); + F = M->getFunction("test"); + ASSERT_TRUE(F); + setupAnalyses(); + MemorySSA &MSSA = *Analyses->MSSA; + MemorySSAWalker *Walker = Analyses->Walker; + + auto &BB = F->getEntryBlock(); + auto &SI = cast(*BB.begin()); + auto &Call = cast(*std::next(BB.begin())); + auto &LI = cast(*std::next(std::next(BB.begin()))); + + { + MemoryAccess *SAccess = MSSA.getMemoryAccess(&SI); + MemoryAccess *LAccess = MSSA.getMemoryAccess(&LI); + MemoryAccess *SClobber = Walker->getClobberingMemoryAccess(SAccess); + EXPECT_TRUE(MSSA.isLiveOnEntryDef(SClobber)); + MemoryAccess *LClobber = Walker->getClobberingMemoryAccess(LAccess); + EXPECT_EQ(SAccess, LClobber); + } + + // remove store and verify that the memory accesses still make sense + MemorySSAUpdater Updater(&MSSA); + Updater.removeMemoryAccess(&SI); + SI.eraseFromParent(); + + { + MemoryAccess *CallAccess = MSSA.getMemoryAccess(&Call); + MemoryAccess *LAccess = MSSA.getMemoryAccess(&LI); + MemoryAccess *LClobber = Walker->getClobberingMemoryAccess(LAccess); + EXPECT_EQ(CallAccess, LClobber); + } +}