Index: include/llvm/Analysis/GlobalsModRef.h =================================================================== --- include/llvm/Analysis/GlobalsModRef.h +++ include/llvm/Analysis/GlobalsModRef.h @@ -50,6 +50,11 @@ /// For each function, keep track of what globals are modified or read. DenseMap FunctionInfos; + /// A map of functions to SCC. The SCCs are described by a simple integer + /// ID that is only useful for comparing for equality (are two functions + /// in the same SCC or not?) + DenseMap FunctionToSCCMap; + /// Handle to clear this analysis on deletion of values. struct DeletionCallbackHandle final : CallbackVH { GlobalsAAResult *GAR; @@ -103,8 +108,11 @@ SmallPtrSetImpl *Writers = nullptr, GlobalValue *OkayStoreDest = nullptr); bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); - + void CollectSCCMembership(CallGraph &CG); + bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V); + ModRefInfo getModRefInfoForArgument(ImmutableCallSite CS, + const GlobalValue *GV); }; /// Analysis pass providing a never-invalidated alias analysis result. Index: lib/Analysis/GlobalsModRef.cpp =================================================================== --- lib/Analysis/GlobalsModRef.cpp +++ lib/Analysis/GlobalsModRef.cpp @@ -358,6 +358,21 @@ if (isFreeCall(I, &TLI)) { if (Writers) Writers->insert(CS->getParent()->getParent()); + } else if (CS.doesNotCapture(CS.getArgumentNo(&U))) { + Function *ParentF = CS->getParent()->getParent(); + // A nocapture argument may be read from or written to, but does not + // escape unless the call can somehow recurse. + // + // nocapture "indicates that the callee does not make any copies of + // the pointer that outlive itself". Therefore if we directly or + // indirectly recurse, we must treat the pointer as escaping. + if (FunctionToSCCMap[ParentF] == + FunctionToSCCMap[CS.getCalledFunction()]) + return true; + if (Readers) + Readers->insert(ParentF); + if (Writers) + Writers->insert(ParentF); } else { return true; // Argument of an unknown call. } @@ -439,6 +454,21 @@ return true; } +void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) { + // We do a bottom-up SCC traversal of the call graph. In other words, we + // visit all callees before callers (leaf-first). + unsigned SCCID = 0; + for (scc_iterator I = scc_begin(&CG); !I.isAtEnd(); ++I) { + const std::vector &SCC = *I; + assert(!SCC.empty() && "SCC with no functions?"); + + for (auto *CGN : SCC) + if (Function *F = CGN->getFunction()) + FunctionToSCCMap[F] = SCCID; + ++SCCID; + } +} + /// AnalyzeCallGraph - At this point, we know the functions where globals are /// immediately stored to and read from. Propagate this information up the call /// graph to all callers and compute the mod/ref info for all memory for each @@ -765,6 +795,32 @@ return AAResultBase::alias(LocA, LocB); } +ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS, + const GlobalValue *GV) { + if (CS.doesNotAccessMemory()) + return MRI_NoModRef; + ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef; + + // Iterate through all the arguments to the called function. If any argument + // is based on GV, return the conservative result. + for (auto &A : CS.args()) { + SmallVector Objects; + GetUnderlyingObjects(A, Objects, DL); + + // All objects must be identified. + if (!std::all_of(Objects.begin(), Objects.end(), [&GV](const Value *V) { + return isIdentifiedObject(V); + })) + return ConservativeResult; + + if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end()) + return ConservativeResult; + } + + // We identified all objects in the argument list, and none of them were GV. + return MRI_NoModRef; +} + ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { unsigned Known = MRI_ModRef; @@ -777,7 +833,8 @@ if (const Function *F = CS.getCalledFunction()) if (NonAddressTakenGlobals.count(GV)) if (const FunctionInfo *FI = getFunctionInfo(F)) - Known = FI->getModRefInfoForGlobal(*GV); + Known = FI->getModRefInfoForGlobal(*GV) | + getModRefInfoForArgument(CS, GV); if (Known == MRI_NoModRef) return MRI_NoModRef; // No need to query other mod/ref analyses @@ -807,6 +864,9 @@ CallGraph &CG) { GlobalsAAResult Result(M.getDataLayout(), TLI); + // Discover which functions aren't recursive, to feed into AnalyzeGlobals. + Result.CollectSCCMembership(CG); + // Find non-addr taken globals. Result.AnalyzeGlobals(M); Index: test/Analysis/GlobalsModRef/memset-escape.ll =================================================================== --- /dev/null +++ test/Analysis/GlobalsModRef/memset-escape.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -O1 -S -enable-non-lto-gmr=true | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +@a = internal global [3 x i32] zeroinitializer, align 4 +@b = common global i32 0, align 4 + +; The important thing we're checking for here is the reload of (some element of) +; @a after the memset. + +; CHECK-LABEL: @main +; CHECK: call void @llvm.memset.p0i8.i64{{.*}} @a +; CHECK: store i32 3 +; CHECK: load i32, i32* getelementptr {{.*}} @a +; CHECK: icmp eq i32 +; CHECK: br i1 + +define i32 @main() { +entry: + %retval = alloca i32, align 4 + %c = alloca [1 x i32], align 4 + store i32 0, i32* %retval, align 4 + %0 = bitcast [1 x i32]* %c to i8* + call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 4, i32 4, i1 false) + store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4 + store i32 0, i32* @b, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %1 = load i32, i32* @b, align 4 + %cmp = icmp slt i32 %1, 3 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load i32, i32* @b, align 4 + %idxprom = sext i32 %2 to i64 + %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* @a, i64 0, i64 %idxprom + store i32 0, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %3 = load i32, i32* @b, align 4 + %inc = add nsw i32 %3, 1 + store i32 %inc, i32* @b, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %4 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4 + %cmp1 = icmp ne i32 %4, 0 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.end + call void @abort() #3 + unreachable + +if.end: ; preds = %for.end + ret i32 0 +} + +; Function Attrs: nounwind argmemonly +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly + +; Function Attrs: noreturn nounwind +declare void @abort() noreturn nounwind Index: test/Analysis/GlobalsModRef/nocapture.ll =================================================================== --- /dev/null +++ test/Analysis/GlobalsModRef/nocapture.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -globals-aa -aa-eval -print-all-alias-modref-info -S 2>&1 | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +@a = internal global i32 0, align 4 +@b = internal global i32 0, align 4 + +define void @g(i32* %p, void (i32*)* nocapture %ptr) { +entry: + tail call void %ptr(i32* %p) #1 + ret void +} + +; CHECK-LABEL: Function: f +; CHECK: MayAlias: i32* %p, i32* @a +; CHECK: MayAlias: i32* %q, i32* @a +define i32 @f(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) { +entry: + tail call void @g(i32* nonnull @a, void (i32*)* %ptr) + %arrayidx = getelementptr inbounds i32, i32* %p, i64 0 + %z1 = load i32, i32* %arrayidx, align 4 + %z2 = load i32, i32* %q, align 4 + %add = add nsw i32 %z2, %z1 + store i32 %add, i32* %q, align 4 + ret i32 4 +} + +define void @g2(i32* nocapture %p, void (i32*)* nocapture %ptr) { +entry: + tail call void %ptr(i32* %p) #1 + ret void +} + +; CHECK-LABEL: Function: f2 +; CHECK: NoAlias: i32* %p, i32* @b +; CHECK: NoAlias: i32* %q, i32* @b +define i32 @f2(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) { +entry: + tail call void @g2(i32* nonnull @b, void (i32*)* %ptr) + %arrayidx = getelementptr inbounds i32, i32* %p, i64 0 + %z1 = load i32, i32* %arrayidx, align 4 + %z2 = load i32, i32* %q, align 4 + %add = add nsw i32 %z2, %z1 + store i32 %add, i32* %q, align 4 + ret i32 4 +}