diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -1039,7 +1039,8 @@ // updated if a new clobber is found by this SkipSelf search. If this // additional query becomes heavily used we may decide to cache the result. // Walker instantiations will decide how to set the SkipSelf bool. - MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool); + MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool, + bool UseInvariantGroup = true); }; /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no @@ -1064,6 +1065,11 @@ unsigned &UWL) { return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL); } + // This method is not accessible outside of this file. + MemoryAccess *getClobberingMemoryAccessWithoutInvariantGroup(MemoryAccess *MA, + unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, UWL, false, false); + } MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override { unsigned UpwardWalkLimit = MaxCheckLimit; @@ -1460,10 +1466,13 @@ unsigned UpwardWalkLimit = MaxCheckLimit; while (UpperBound > LocInfo.LowerBound) { if (isa(VersionStack[UpperBound])) { - // For phis, use the walker, see where we ended up, go there + // For phis, use the walker, see where we ended up, go there. + // The invariant.group handling in MemorySSA is ad-hoc and doesn't + // support updates, so don't use it to optimize uses. MemoryAccess *Result = - Walker->getClobberingMemoryAccess(MU, UpwardWalkLimit); - // We are guaranteed to find it or something is wrong + Walker->getClobberingMemoryAccessWithoutInvariantGroup( + MU, UpwardWalkLimit); + // We are guaranteed to find it or something is wrong. while (VersionStack[UpperBound] != Result) { assert(UpperBound != 0); --UpperBound; @@ -2469,15 +2478,88 @@ return Clobber; } +static const Instruction * +getInvariantGroupClobberingInstruction(Instruction &I, DominatorTree &DT) { + if (!I.hasMetadata(LLVMContext::MD_invariant_group) || I.isVolatile()) + return nullptr; + + // We consider bitcasts and zero GEPs to be the same pointer value. Start by + // stripping bitcasts and zero GEPs, then we will recursively look at loads + // and stores through bitcasts and zero GEPs. + Value *PointerOperand = getLoadStorePointerOperand(&I)->stripPointerCasts(); + + // It's not safe to walk the use list of a global value because function + // passes aren't allowed to look outside their functions. + // FIXME: this could be fixed by filtering instructions from outside of + // current function. + if (isa(PointerOperand)) + return nullptr; + + // Queue to process all pointers that are equivalent to load operand. + SmallVector PointerUsesQueue; + PointerUsesQueue.push_back(PointerOperand); + + const Instruction *MostDominatingInstruction = &I; + + // FIXME: This loop is O(n^2) because dominates can be O(n) and in worst case + // we will see all the instructions. It may not matter in practice. If it + // does, we will have to support MemorySSA construction and updates. + while (!PointerUsesQueue.empty()) { + const Value *Ptr = PointerUsesQueue.pop_back_val(); + assert(Ptr && !isa(Ptr) && + "Null or GlobalValue should not be inserted"); + + for (const User *Us : Ptr->users()) { + auto *U = dyn_cast(Us); + if (!U || U == &I || !DT.dominates(U, MostDominatingInstruction)) + continue; + + // Add bitcasts and zero GEPs to queue. + if (isa(U)) { + PointerUsesQueue.push_back(U); + continue; + } + if (auto *GEP = dyn_cast(U)) { + if (GEP->hasAllZeroIndices()) + PointerUsesQueue.push_back(U); + continue; + } + + // If we hit a load/store with an invariant.group metadata and the same + // pointer operand, we can assume that value pointed to by the pointer + // operand didn't change. + if (U->hasMetadata(LLVMContext::MD_invariant_group) && + getLoadStorePointerOperand(U) == Ptr && !U->isVolatile()) { + MostDominatingInstruction = U; + } + } + } + return MostDominatingInstruction == &I ? nullptr : MostDominatingInstruction; +} + template MemoryAccess * MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase( - MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf) { + MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf, + bool UseInvariantGroup) { auto *StartingAccess = dyn_cast(MA); // If this is a MemoryPhi, we can't do anything. if (!StartingAccess) return MA; + if (UseInvariantGroup) { + if (auto *I = getInvariantGroupClobberingInstruction( + *StartingAccess->getMemoryInst(), MSSA->getDomTree())) { + assert(isa(I) || isa(I)); + + auto *ClobberMA = MSSA->getMemoryAccess(I); + assert(ClobberMA); + if (isa(ClobberMA)) + return ClobberMA->getDefiningAccess(); + return ClobberMA; + } + } + bool IsOptimized = false; // If this is an already optimized use or def, return the optimized result. diff --git a/llvm/test/Analysis/MemorySSA/invariant-groups.ll b/llvm/test/Analysis/MemorySSA/invariant-groups.ll --- a/llvm/test/Analysis/MemorySSA/invariant-groups.ll +++ b/llvm/test/Analysis/MemorySSA/invariant-groups.ll @@ -1,11 +1,44 @@ ; RUN: opt -aa-pipeline=basic-aa -passes='print' -verify-memoryssa < %s 2>&1 | FileCheck %s -; -; Currently, MemorySSA doesn't support invariant groups. So, we should ignore -; launder.invariant.group intrinsics entirely. We'll need to pay attention to -; them when/if we decide to support invariant groups. @g = external global i32 +; CHECK-LABEL: define {{.*}} @global( +define i32 @global() { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 0 + store i32 0, i32* @g, align 4, !invariant.group !0 + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: call void @clobber + call void @clobber(i32* @g) + +; FIXME: this could be clobbered by 1 if we walked the instruction list for loads/stores to @g. +; But we can't look at the uses of @g in a function analysis. +; CHECK: MemoryUse(2) {{.*}} clobbered by 2 +; CHECK-NEXT: %1 = load i32 + %1 = load i32, i32* @g, align 4, !invariant.group !0 + ret i32 %1 +} + +; CHECK-LABEL: define {{.*}} @global2( +define i32 @global2() { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 0 + store i32 0, i32* inttoptr (i64 ptrtoint (i32* @g to i64) to i32*), align 4, !invariant.group !0 + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: call void @clobber + call void @clobber(i32* inttoptr (i64 ptrtoint (i32* @g to i64) to i32*)) + +; FIXME: this could be clobbered by 1 if we walked the instruction list for loads/stores to @g. +; But we can't look at the uses of @g in a function analysis. +; CHECK: MemoryUse(2) {{.*}} clobbered by 2 +; CHECK-NEXT: %1 = load i32 + %1 = load i32, i32* inttoptr (i64 ptrtoint (i32* @g to i64) to i32*), align 4, !invariant.group !0 + ret i32 %1 +} + +; CHECK-LABEL: define {{.*}} @foo( define i32 @foo(i32* %a) { ; CHECK: 1 = MemoryDef(liveOnEntry) ; CHECK-NEXT: store i32 0 @@ -29,6 +62,41 @@ ret i32 %2 } +; CHECK-LABEL: define {{.*}} @volatile1( +define void @volatile1(i32* %a) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 0 + store i32 0, i32* %a, align 4, !invariant.group !0 + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: call void @clobber + call void @clobber(i32* %a) + +; CHECK: 3 = MemoryDef(2){{.*}} clobbered by 2 +; CHECK-NEXT: load volatile + %b = load volatile i32, i32* %a, align 4, !invariant.group !0 + + ret void +} + +; CHECK-LABEL: define {{.*}} @volatile2( +define void @volatile2(i32* %a) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store volatile i32 0 + store volatile i32 0, i32* %a, align 4, !invariant.group !0 + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: call void @clobber + call void @clobber(i32* %a) + +; CHECK: MemoryUse(2){{.*}} clobbered by 2 +; CHECK-NEXT: load i32 + %b = load i32, i32* %a, align 4, !invariant.group !0 + + ret void +} + +; CHECK-LABEL: define {{.*}} @skipBarrier( define i32 @skipBarrier(i32* %a) { ; CHECK: 1 = MemoryDef(liveOnEntry) ; CHECK-NEXT: store i32 0 @@ -47,6 +115,7 @@ ret i32 %2 } +; CHECK-LABEL: define {{.*}} @skipBarrier2( define i32 @skipBarrier2(i32* %a) { ; CHECK: MemoryUse(liveOnEntry) @@ -67,8 +136,7 @@ ; CHECK-NEXT: store i32 1 store i32 1, i32* @g, align 4 -; FIXME: based on invariant.group it should be MemoryUse(liveOnEntry) -; CHECK: MemoryUse(2) {{.*}} clobbered by 2 +; CHECK: MemoryUse(2) {{.*}} clobbered by liveOnEntry ; CHECK-NEXT: %v3 = load i32 %v3 = load i32, i32* %a32, align 4, !invariant.group !0 %add = add nsw i32 %v2, %v3 @@ -76,6 +144,7 @@ ret i32 %add2 } +; CHECK-LABEL: define {{.*}} @handleInvariantGroups( define i32 @handleInvariantGroups(i32* %a) { ; CHECK: 1 = MemoryDef(liveOnEntry) ; CHECK-NEXT: store i32 0 @@ -98,14 +167,14 @@ ; CHECK-NEXT: store i32 2 store i32 2, i32* @g, align 4 -; FIXME: This can be changed to MemoryUse(2) -; CHECK: MemoryUse(4) {{.*}} clobbered by 4 +; CHECK: MemoryUse(4) {{.*}} clobbered by 2 ; CHECK-NEXT: %3 = load i32 %3 = load i32, i32* %a32, align 4, !invariant.group !0 %add = add nsw i32 %2, %3 ret i32 %add } +; CHECK-LABEL: define {{.*}} @loop( define i32 @loop(i1 %a) { entry: %0 = alloca i32, align 4 @@ -118,15 +187,13 @@ br i1 %a, label %Loop.Body, label %Loop.End Loop.Body: -; FIXME: MemoryUse(1) -; CHECK: MemoryUse(2) {{.*}} clobbered by 2 +; CHECK: MemoryUse(2) {{.*}} clobbered by 1 ; CHECK-NEXT: %1 = load i32 %1 = load i32, i32* %0, !invariant.group !0 br i1 %a, label %Loop.End, label %Loop.Body Loop.End: -; FIXME: MemoryUse(1) -; CHECK: MemoryUse(2) {{.*}} clobbered by 2 +; CHECK: MemoryUse(2) {{.*}} clobbered by 1 ; CHECK-NEXT: %2 = load %2 = load i32, i32* %0, align 4, !invariant.group !0 br i1 %a, label %Ret, label %Loop.Body @@ -135,6 +202,7 @@ ret i32 %2 } +; CHECK-LABEL: define {{.*}} @loop2( define i8 @loop2(i8* %p) { entry: ; CHECK: 1 = MemoryDef(liveOnEntry) @@ -154,8 +222,7 @@ ; CHECK-NEXT: %0 = load i8 %0 = load i8, i8* %after, !invariant.group !0 -; FIXME: MemoryUse(1) -; CHECK: MemoryUse(6) {{.*}} clobbered by 6 +; CHECK: MemoryUse(6) {{.*}} clobbered by 1 ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %p, !invariant.group !0 @@ -169,8 +236,7 @@ ; CHECK-NEXT: %2 = load %2 = load i8, i8* %after, align 4, !invariant.group !0 -; FIXME: MemoryUse(1) -; CHECK: MemoryUse(5) {{.*}} clobbered by 5 +; CHECK: MemoryUse(5) {{.*}} clobbered by 1 ; CHECK-NEXT: %3 = load %3 = load i8, i8* %p, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -180,6 +246,7 @@ } +; CHECK-LABEL: define {{.*}} @loop3( define i8 @loop3(i8* %p) { entry: ; CHECK: 1 = MemoryDef(liveOnEntry) @@ -203,8 +270,7 @@ ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(8) -; CHECK: MemoryUse(4) {{.*}} clobbered by 4 +; CHECK: MemoryUse(4) {{.*}} clobbered by 8 ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %after, !invariant.group !0 @@ -214,8 +280,7 @@ ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(8) -; CHECK: MemoryUse(5) {{.*}} clobbered by 5 +; CHECK: MemoryUse(5) {{.*}} clobbered by 8 ; CHECK-NEXT: %2 = load i8 %2 = load i8, i8* %after, !invariant.group !0 @@ -230,8 +295,7 @@ ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(7) -; CHECK: MemoryUse(6) {{.*}} clobbered by 6 +; CHECK: MemoryUse(6) {{.*}} clobbered by 7 ; CHECK-NEXT: %4 = load %4 = load i8, i8* %after, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -240,6 +304,7 @@ ret i8 %3 } +; CHECK-LABEL: define {{.*}} @loop4( define i8 @loop4(i8* %p) { entry: ; CHECK: 1 = MemoryDef(liveOnEntry) @@ -263,8 +328,7 @@ ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %after, !invariant.group !0 -; FIXME: MemoryUse(2) -; CHECK: MemoryUse(6) {{.*}} clobbered by 6 +; CHECK: MemoryUse(6) {{.*}} clobbered by 1 ; CHECK-NEXT: %2 = load i8 %2 = load i8, i8* %p, !invariant.group !0 @@ -277,8 +341,7 @@ ; CHECK-NEXT: %3 = load %3 = load i8, i8* %after, align 4, !invariant.group !0 -; FIXME: MemoryUse(2) -; CHECK: MemoryUse(5) {{.*}} clobbered by 5 +; CHECK: MemoryUse(5) {{.*}} clobbered by 1 ; CHECK-NEXT: %4 = load %4 = load i8, i8* %p, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -288,7 +351,7 @@ } ; In the future we would like to CSE barriers if there is no clobber between. -; CHECK-LABEL: define i8 @optimizable() +; CHECK-LABEL: define {{.*}} @optimizable( define i8 @optimizable() { entry: %ptr = alloca i8 @@ -318,7 +381,7 @@ ret i8 %v } -; CHECK-LABEL: define i8 @unoptimizable2() +; CHECK-LABEL: define {{.*}} @unoptimizable2() define i8 @unoptimizable2() { %ptr = alloca i8 ; CHECK: 1 = MemoryDef(liveOnEntry) diff --git a/llvm/test/Transforms/NewGVN/invariant.group-xfail.ll b/llvm/test/Transforms/NewGVN/invariant.group.ll rename from llvm/test/Transforms/NewGVN/invariant.group-xfail.ll rename to llvm/test/Transforms/NewGVN/invariant.group.ll --- a/llvm/test/Transforms/NewGVN/invariant.group-xfail.ll +++ b/llvm/test/Transforms/NewGVN/invariant.group.ll @@ -1,5 +1,4 @@ -; XFAIL: * -; RUN: opt < %s -newgvn -S | FileCheck %s +; RUN: opt < %s -passes=newgvn -S | FileCheck %s %struct.A = type { i32 (...)** } @_ZTV1A = available_externally unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 8 @@ -54,9 +53,6 @@ ; CHECK-LABEL: define i1 @proveEqualityForStrip( define i1 @proveEqualityForStrip(i8* %a) { -; FIXME: The first call could be also removed by GVN. Right now -; DCE removes it. The second call is CSE'd with the first one. -; CHECK: %b1 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) %b1 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) ; CHECK-NOT: llvm.strip.invariant.group %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) @@ -76,6 +72,7 @@ ret i8 %a } +; NewGVN doesn't support assumes. ; CHECK-LABEL: define void @indirectLoads() { define void @indirectLoads() { entry: @@ -96,7 +93,7 @@ %3 = load %struct.A*, %struct.A** %a, align 8 %4 = bitcast %struct.A* %3 to void (%struct.A*)*** -; CHECK: call void @_ZN1A3fooEv( +; FIXME: call void @_ZN1A3fooEv( %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !0 %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0 %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8 @@ -104,7 +101,7 @@ %6 = load %struct.A*, %struct.A** %a, align 8 %7 = bitcast %struct.A* %6 to void (%struct.A*)*** -; CHECK: call void @_ZN1A3fooEv( +; FIXME: call void @_ZN1A3fooEv( %vtable2 = load void (%struct.A*)**, void (%struct.A*)*** %7, align 8, !invariant.group !0 %vfn3 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable2, i64 0 %8 = load void (%struct.A*)*, void (%struct.A*)** %vfn3, align 8 @@ -116,19 +113,20 @@ %vtable4 = load void (%struct.A*)**, void (%struct.A*)*** %10, align 8, !invariant.group !0 %vfn5 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable4, i64 0 %11 = load void (%struct.A*)*, void (%struct.A*)** %vfn5, align 8 -; CHECK: call void @_ZN1A3fooEv( +; FIXME: call void @_ZN1A3fooEv( call void %11(%struct.A* %9) %vtable5 = load i8**, i8*** %2, align 8, !invariant.group !0 %vfn6 = getelementptr inbounds i8*, i8** %vtable5, i64 0 %12 = bitcast i8** %vfn6 to void (%struct.A*)** %13 = load void (%struct.A*)*, void (%struct.A*)** %12, align 8 -; CHECK: call void @_ZN1A3fooEv( +; FIXME: call void @_ZN1A3fooEv( call void %13(%struct.A* %9) ret void } +; NewGVN won't CSE loads with different pointee types. ; CHECK-LABEL: define void @combiningBitCastWithLoad() { define void @combiningBitCastWithLoad() { entry: @@ -145,7 +143,7 @@ %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) store %struct.A* %1, %struct.A** %a, align 8 -; CHECK-NOT: !invariant.group +; FIXME-NOT: !invariant.group %3 = load %struct.A*, %struct.A** %a, align 8 %4 = bitcast %struct.A* %3 to void (%struct.A*)*** @@ -163,7 +161,7 @@ %ptr = alloca i8 store i8 42, i8* %ptr call void @foo(i8* %ptr) -; CHECK: %[[A:.*]] = load i8, i8* %ptr, !invariant.group +; CHECK: %[[A:.*]] = load i8, i8* %ptr, align 1, !invariant.group %a = load i8, i8* %ptr, !invariant.group !0 ; CHECK-NOT: load %b = load i8, i8* %ptr, !invariant.group !0 @@ -180,7 +178,7 @@ %ptr = alloca i8 store i8 42, i8* %ptr call void @foo(i8* %ptr) -; CHECK: %[[D:.*]] = load i8, i8* %ptr, !invariant.group +; CHECK: %[[D:.*]] = load i8, i8* %ptr, align 1, !invariant.group %c = load i8, i8* %ptr ; CHECK-NOT: load %d = load i8, i8* %ptr, !invariant.group !0 @@ -197,7 +195,7 @@ %ptr = alloca i8 store i8 42, i8* %ptr call void @foo(i8* %ptr) -; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group +; CHECK: %[[E:.*]] = load i8, i8* %ptr, align 1, !invariant.group %e = load i8, i8* %ptr, !invariant.group !0 ; CHECK-NOT: load %f = load i8, i8* %ptr @@ -214,7 +212,7 @@ %ptr = alloca i8 store i8 42, i8* %ptr call void @foo(i8* %ptr) -; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group +; CHECK: %[[E:.*]] = load i8, i8* %ptr, align 1, !invariant.group %e = load i8, i8* %ptr, !invariant.group !0 ; CHECK-NOT: load %f = load i8, i8* %ptr, !invariant.group !0 @@ -251,16 +249,17 @@ ret i8 %a } +; NewGVN cares about the launder for some reason. ; CHECK-LABEL: define i8 @optimizable4() { define i8 @optimizable4() { entry: %ptr = alloca i8 - store i8 42, i8* %ptr, !invariant.group !0 + store i8 42, i8* %ptr %ptr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr) -; CHECK-NOT: load - %a = load i8, i8* %ptr2, !invariant.group !0 +; FIXME-NOT: load + %a = load i8, i8* %ptr2 -; CHECK: ret i8 42 +; FIXME: ret i8 42 ret i8 %a } @@ -276,7 +275,7 @@ call void @bar(i8 %b) %c = load volatile i8, i8* %ptr, !invariant.group !0 -; FIXME: we could change %c to 42, preserving volatile load +; We might be able to optimize this, but nobody cares ; CHECK: call void @bar(i8 %c) call void @bar(i8 %c) ; CHECK: ret i8 42 @@ -295,15 +294,15 @@ call void @bar(i8 %b) %c = load volatile i8, i8* %ptr, !invariant.group !0 -; FIXME: we could change %c to 42, preserving volatile load +; We might be able to optimize this, but nobody cares ; CHECK: call void @bar(i8 %c) call void @bar(i8 %c) ; CHECK: ret i8 42 ret i8 %a } -; CHECK-LABEL: define i8 @fun() { -define i8 @fun() { +; CHECK-LABEL: define void @fun() { +define void @fun() { entry: %ptr = alloca i8 store i8 42, i8* %ptr, !invariant.group !0 @@ -313,23 +312,10 @@ ; CHECK: call void @bar(i8 42) call void @bar(i8 %a) - %newPtr = call i8* @getPointer(i8* %ptr) - %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr -; CHECK: call void @bar(i8 %c) - call void @bar(i8 %c) - - %unknownValue = load i8, i8* @unknownPtr -; FIXME: Can assume that %unknownValue == 42 -; CHECK: store i8 %unknownValue, i8* %ptr, !invariant.group !0 - store i8 %unknownValue, i8* %ptr, !invariant.group !0 - - %newPtr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr) -; CHECK-NOT: load - %d = load i8, i8* %newPtr2, !invariant.group !0 -; CHECK: ret i8 %unknownValue - ret i8 %d + ret void } +; FIXME: NewGVN doesn't run instsimplify on a load from a vtable definition? ; This test checks if invariant.group understands gep with zeros ; CHECK-LABEL: define void @testGEP0() { define void @testGEP0() { @@ -347,7 +333,7 @@ %6 = bitcast %struct.A* %a to void (%struct.A*)*** %7 = load void (%struct.A*)**, void (%struct.A*)*** %6, align 8, !invariant.group !0 %8 = load void (%struct.A*)*, void (%struct.A*)** %7, align 8 -; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a) +; FIXME: call void @_ZN1A3fooEv(%struct.A* nonnull %a) call void %8(%struct.A* nonnull %a) br label %_Z1gR1A.exit @@ -360,10 +346,10 @@ ; from the same function. ; CHECK-LABEL: define void @testGlobal() { define void @testGlobal() { -; CHECK: %a = load i8, i8* @unknownPtr, !invariant.group !0 +; CHECK: %a = load i8, i8* @unknownPtr, align 1, !invariant.group !0 %a = load i8, i8* @unknownPtr, !invariant.group !0 call void @foo2(i8* @unknownPtr, i8 %a) -; CHECK: %1 = load i8, i8* @unknownPtr, !invariant.group !0 +; CHECK: %1 = load i8, i8* @unknownPtr, align 1, !invariant.group !0 %1 = load i8, i8* @unknownPtr, !invariant.group !0 call void @bar(i8 %1) @@ -378,12 +364,14 @@ call void @fooBit(i1* %b0, i1 %3) ret void } -; And in the case it is not global -; CHECK-LABEL: define void @testNotGlobal() { -define void @testNotGlobal() { + +; Might be similar to above where NewGVN doesn't handle loads of different types from the same location. +; Not super important anyway. +; CHECK-LABEL: define void @testTrunc() { +define void @testTrunc() { %a = alloca i8 call void @foo(i8* %a) -; CHECK: %b = load i8, i8* %a, !invariant.group !0 +; CHECK: %b = load i8, i8* %a, align 1, !invariant.group !0 %b = load i8, i8* %a, !invariant.group !0 call void @foo2(i8* %a, i8 %b) @@ -393,16 +381,17 @@ %b0 = bitcast i8* %a to i1* call void @fooBit(i1* %b0, i1 1) -; CHECK: %1 = trunc i8 %b to i1 +; FIXME: %1 = trunc i8 %b to i1 %2 = load i1, i1* %b0, !invariant.group !0 -; CHECK-NEXT: call void @fooBit(i1* %b0, i1 %1) +; FIXME-NEXT: call void @fooBit(i1* %b0, i1 %1) call void @fooBit(i1* %b0, i1 %2) %3 = load i1, i1* %b0, !invariant.group !0 -; CHECK-NEXT: call void @fooBit(i1* %b0, i1 %1) +; FIXME-NEXT: call void @fooBit(i1* %b0, i1 %1) call void @fooBit(i1* %b0, i1 %3) ret void } +; See comment in @testGEP0 on what NewGVN is lacking. ; CHECK-LABEL: define void @handling_loops() define void @handling_loops() { %a = alloca %struct.A, align 8 @@ -426,9 +415,9 @@ %8 = phi i8 [ %10, %._crit_edge ], [ 1, %._crit_edge.preheader ] %.pre = load void (%struct.A*)**, void (%struct.A*)*** %5, align 8, !invariant.group !0 %9 = load void (%struct.A*)*, void (%struct.A*)** %.pre, align 8 - ; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a) + ; FIXME: call void @_ZN1A3fooEv(%struct.A* nonnull %a) call void %9(%struct.A* nonnull %a) #3 - ; CHECK-NOT: call void % + ; FIXME-NOT: call void % %10 = add nuw nsw i8 %8, 1 %11 = load i8, i8* @unknownPtr, align 4 %12 = icmp slt i8 %10, %11 @@ -451,10 +440,11 @@ declare void @fooBit(i1*, i1) declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) ; Function Attrs: nounwind declare void @llvm.assume(i1 %cmp.vtables) #0 attributes #0 = { nounwind } -!0 = !{} \ No newline at end of file +!0 = !{} diff --git a/llvm/unittests/Analysis/MemorySSATest.cpp b/llvm/unittests/Analysis/MemorySSATest.cpp --- a/llvm/unittests/Analysis/MemorySSATest.cpp +++ b/llvm/unittests/Analysis/MemorySSATest.cpp @@ -1725,4 +1725,50 @@ EXPECT_TRUE(ItB->second.Size.getValue() == 8); } } -} \ No newline at end of file +} + +TEST_F(MemorySSATest, TestInvariantGroup) { + SMDiagnostic E; + auto M = parseAssemblyString("declare void @f(i8*)\n" + "define i8 @test(i8* %p) {\n" + "entry:\n" + " store i8 42, i8* %p, !invariant.group !0\n" + " call void @f(i8* %p)\n" + " %v = load i8, i8* %p, !invariant.group !0\n" + " ret i8 %v\n" + "}\n" + "!0 = !{}", + E, C); + ASSERT_TRUE(M); + F = M->getFunction("test"); + ASSERT_TRUE(F); + setupAnalyses(); + MemorySSA &MSSA = *Analyses->MSSA; + MemorySSAWalker *Walker = Analyses->Walker; + + auto &BB = F->getEntryBlock(); + auto &SI = cast(*BB.begin()); + auto &Call = cast(*std::next(BB.begin())); + auto &LI = cast(*std::next(std::next(BB.begin()))); + + { + MemoryAccess *SAccess = MSSA.getMemoryAccess(&SI); + MemoryAccess *LAccess = MSSA.getMemoryAccess(&LI); + MemoryAccess *SClobber = Walker->getClobberingMemoryAccess(SAccess); + EXPECT_TRUE(MSSA.isLiveOnEntryDef(SClobber)); + MemoryAccess *LClobber = Walker->getClobberingMemoryAccess(LAccess); + EXPECT_EQ(SAccess, LClobber); + } + + // remove store and verify that the memory accesses still make sense + MemorySSAUpdater Updater(&MSSA); + Updater.removeMemoryAccess(&SI); + SI.eraseFromParent(); + + { + MemoryAccess *CallAccess = MSSA.getMemoryAccess(&Call); + MemoryAccess *LAccess = MSSA.getMemoryAccess(&LI); + MemoryAccess *LClobber = Walker->getClobberingMemoryAccess(LAccess); + EXPECT_EQ(CallAccess, LClobber); + } +}