diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -934,6 +934,29 @@ PerformedPhiTranslation = false; } + MemoryAccess *findInvariantGroupClobber(MemoryAccess *Start, + UpwardsMemoryQuery &Q) { + auto *LI = dyn_cast(Query->Inst); + + if (!LI || !LI->hasMetadata(LLVMContext::MD_invariant_group)) + return nullptr; + + auto *StrippedPointerOperand = LI->getPointerOperand()->stripPointerCasts(); + + MemoryAccess *Current = Start; + while (auto *M = dyn_cast_or_null(Current)) { + if (auto *SI = dyn_cast_or_null(M->getMemoryInst())) { + if (SI->hasMetadata(LLVMContext::MD_invariant_group) && + SI->getPointerOperand()->stripPointerCasts() == + StrippedPointerOperand) { + return M; + } + } + Current = M->getDefiningAccess(); + } + return nullptr; + } + public: ClobberWalker(const MemorySSA &MSSA, AliasAnalysisType &AA, DominatorTree &DT) : MSSA(MSSA), AA(AA), DT(DT) {} @@ -949,6 +972,9 @@ if (!UpWalkLimit) UpWalkLimit++; + if (auto *M = findInvariantGroupClobber(Start, Q)) + return M; + MemoryAccess *Current = Start; // This walker pretends uses don't exist. If we're handed one, silently grab // its def. (This has the nice side-effect of ensuring we never cache uses) @@ -1467,12 +1493,16 @@ // We were last killed now by where we got to if (MSSA->isLiveOnEntryDef(VersionStack[UpperBound])) LocInfo.AR = None; - MU->setDefiningAccess(VersionStack[UpperBound], true, LocInfo.AR); + auto *LI = dyn_cast(MU->getMemoryInst()); + if (!LI || !LI->hasMetadata(LLVMContext::MD_invariant_group)) + MU->setDefiningAccess(VersionStack[UpperBound], true, LocInfo.AR); LocInfo.LastKill = UpperBound; } else { // Otherwise, we checked all the new ones, and now we know we can get to // LastKill. - MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true, LocInfo.AR); + auto *LI = dyn_cast(MU->getMemoryInst()); + if (!LI || !LI->hasMetadata(LLVMContext::MD_invariant_group)) + MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true, LocInfo.AR); } LocInfo.LowerBound = VersionStack.size() - 1; LocInfo.LowerBoundBlock = BB; diff --git a/llvm/test/Analysis/MemorySSA/invariant-groups.ll b/llvm/test/Analysis/MemorySSA/invariant-groups.ll --- a/llvm/test/Analysis/MemorySSA/invariant-groups.ll +++ b/llvm/test/Analysis/MemorySSA/invariant-groups.ll @@ -1,4 +1,3 @@ -; RUN: opt -basic-aa -print-memoryssa -verify-memoryssa -enable-new-pm=0 -analyze < %s 2>&1 | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='print' -verify-memoryssa < %s 2>&1 | FileCheck %s ; ; Currently, MemorySSA doesn't support invariant groups. So, we should ignore diff --git a/llvm/test/Transforms/NewGVN/invariant.group.ll b/llvm/test/Transforms/NewGVN/invariant.group.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/invariant.group.ll @@ -0,0 +1,142 @@ +; RUN: opt < %s -newgvn -S | FileCheck %s + +%struct.A = type { i32 (...)** } +@_ZTV1A = available_externally unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 8 +@_ZTI1A = external constant i8* + +@unknownPtr = external global i8 + +; CHECK-LABEL: define i8 @simple() { +define i8 @simple() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load i8, i8* %ptr, !invariant.group !0 + %c = load i8, i8* %ptr, !invariant.group !0 +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @optimizable1() { +define i8 @optimizable1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + + call void @foo(i8* %ptr2); call to use %ptr2 +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @optimizable2() { +define i8 @optimizable2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + store i8 13, i8* %ptr ; can't use this store with invariant.group + %a = load i8, i8* %ptr + call void @bar(i8 %a) ; call to use %a + + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !0 + +; CHECK: ret i8 42 + ret i8 %b +} + +; CHECK-LABEL: define i1 @proveEqualityForStrip( +define i1 @proveEqualityForStrip(i8* %a) { + %b1 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) +; CHECK-NOT: llvm.strip.invariant.group + %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) + %r = icmp eq i8* %b1, %b2 +; CHECK: ret i1 true + ret i1 %r +} + +; CHECK-LABEL: define i8 @unoptimizable1() { +define i8 @unoptimizable1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define void @indirectLoads() { +define void @indirectLoads() { +entry: + %a = alloca %struct.A*, align 8 + %0 = bitcast %struct.A** %a to i8* + + %call = call i8* @getPointer() + %1 = bitcast i8* %call to %struct.A* + call void @_ZN1AC1Ev(%struct.A* %1) + %2 = bitcast %struct.A* %1 to i8*** + +; CHECK: %vtable = load {{.*}} !invariant.group + %vtable = load i8**, i8*** %2, align 8, !invariant.group !0 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) + call void @llvm.assume(i1 %cmp.vtables) + + store %struct.A* %1, %struct.A** %a, align 8 + %3 = load %struct.A*, %struct.A** %a, align 8 + %4 = bitcast %struct.A* %3 to void (%struct.A*)*** + +; CHECK: call void @_ZN1A3fooEv( + %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !0 + %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0 + %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8 + call void %5(%struct.A* %3) + %6 = load %struct.A*, %struct.A** %a, align 8 + %7 = bitcast %struct.A* %6 to void (%struct.A*)*** + +; CHECK: call void @_ZN1A3fooEv( + %vtable2 = load void (%struct.A*)**, void (%struct.A*)*** %7, align 8, !invariant.group !0 + %vfn3 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable2, i64 0 + %8 = load void (%struct.A*)*, void (%struct.A*)** %vfn3, align 8 + + call void %8(%struct.A* %6) + %9 = load %struct.A*, %struct.A** %a, align 8 + %10 = bitcast %struct.A* %9 to void (%struct.A*)*** + + %vtable4 = load void (%struct.A*)**, void (%struct.A*)*** %10, align 8, !invariant.group !0 + %vfn5 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable4, i64 0 + %11 = load void (%struct.A*)*, void (%struct.A*)** %vfn5, align 8 +; CHECK: call void @_ZN1A3fooEv( + call void %11(%struct.A* %9) + + %vtable5 = load i8**, i8*** %2, align 8, !invariant.group !0 + %vfn6 = getelementptr inbounds i8*, i8** %vtable5, i64 0 + %12 = bitcast i8** %vfn6 to void (%struct.A*)** + %13 = load void (%struct.A*)*, void (%struct.A*)** %12, align 8 +; CHECK: call void @_ZN1A3fooEv( + call void %13(%struct.A* %9) + + ret void +} + +declare void @foo(i8*) +declare void @foo2(i8*, i8) +declare void @bar(i8) +declare i8* @getPointer() +declare void @_ZN1A3fooEv(%struct.A*) +declare void @_ZN1AC1Ev(%struct.A*) +declare void @fooBit(i1*, i1) + +declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) + +; Function Attrs: nounwind +declare void @llvm.assume(i1 %cmp.vtables) #0 + +!0 = !{}