Index: include/llvm/Analysis/GlobalsModRef.h
===================================================================
--- include/llvm/Analysis/GlobalsModRef.h
+++ include/llvm/Analysis/GlobalsModRef.h
@@ -50,6 +50,9 @@
   /// For each function, keep track of what globals are modified or read.
   DenseMap<const Function *, FunctionInfo> FunctionInfos;
 
+  /// Functions we have proved to never recurse, directly or indirectly.
+  SmallPtrSet<const Function *, 16> NonRecursiveFunctions;
+  
   /// Handle to clear this analysis on deletion of values.
   struct DeletionCallbackHandle final : CallbackVH {
     GlobalsAAResult &GAR;
@@ -103,8 +106,11 @@
                             SmallPtrSetImpl<Function *> *Writers = nullptr,
                             GlobalValue *OkayStoreDest = nullptr);
   bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
-
+  void CollectNonRecursiveFunctions(CallGraph &CG);
+  
   bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V);
+  ModRefInfo getModRefInfoForArgument(ImmutableCallSite CS,
+                                      const GlobalValue *GV);
 };
 
 /// Analysis pass providing a never-invalidated alias analysis result.
Index: lib/Analysis/GlobalsModRef.cpp
===================================================================
--- lib/Analysis/GlobalsModRef.cpp
+++ lib/Analysis/GlobalsModRef.cpp
@@ -358,6 +358,24 @@
         if (isFreeCall(I, &TLI)) {
           if (Writers)
             Writers->insert(CS->getParent()->getParent());
+        } else if (CS.doesNotCapture(CS.getArgumentNo(&U))) {
+          Function *ParentF = CS->getParent()->getParent();
+          // A nocapture argument may be read from or written to, but does not
+          // escape unless the call can somehow recurse.
+          //
+          // nocapture "indicates that the callee does not make any copies of
+          // the pointer that outlive itself". Therefore if we directly or
+          // indirectly recurse, we must treat the pointer as escaping.
+          if (!NonRecursiveFunctions.count(ParentF))
+            // We don't know that the parent function is not recursive.
+            // FIXME: This is a larger hammer than is strictly necessary - it
+            // tells us that ParentF doesn't recurse through ANY callsite, not
+            // just through this callsite.
+            return true;
+          if (Readers)
+            Readers->insert(ParentF);
+          if (Writers)
+            Writers->insert(ParentF);
         } else {
           return true; // Argument of an unknown call.
         }
@@ -439,6 +457,32 @@
   return true;
 }
 
+void GlobalsAAResult::CollectNonRecursiveFunctions(CallGraph &CG) {  
+  // We do a bottom-up SCC traversal of the call graph.  In other words, we
+  // visit all callees before callers (leaf-first).
+  for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+    const std::vector<CallGraphNode *> &SCC = *I;
+    assert(!SCC.empty() && "SCC with no functions?");
+
+    if (!SCC[0]->getFunction())
+      // Calls externally. We can't tell if this will recurse or not.
+      continue;
+
+    if (SCC.size() != 1)
+      // If there are multiple nodes in an SCC, trivially there is recursion.
+      continue;
+
+    // Even with a single-entry SCC there can still be direct recursion.
+    bool Failed = false;
+    for (auto &CR : *SCC[0])
+      if (SCC[0] == CR.second)
+        Failed = true;
+
+    if (!Failed)
+      NonRecursiveFunctions.insert(SCC[0]->getFunction());
+  }
+}
+
 /// AnalyzeCallGraph - At this point, we know the functions where globals are
 /// immediately stored to and read from.  Propagate this information up the call
 /// graph to all callers and compute the mod/ref info for all memory for each
@@ -765,6 +809,21 @@
   return AAResultBase::alias(LocA, LocB);
 }
 
+ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
+                                                     const GlobalValue *GV) {
+  if (CS.doesNotAccessMemory())
+    return MRI_NoModRef;
+  ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef;
+  
+  // Iterate through all the arguments to the called function. If any argument
+  // is based on GV, return the conservative result.
+  for (auto &A : CS.args())
+    if (GetUnderlyingObject(A, DL) == GV)
+      return ConservativeResult;
+
+  return MRI_NoModRef;
+}
+
 ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
                                           const MemoryLocation &Loc) {
   unsigned Known = MRI_ModRef;
@@ -777,7 +836,8 @@
       if (const Function *F = CS.getCalledFunction())
         if (NonAddressTakenGlobals.count(GV))
           if (const FunctionInfo *FI = getFunctionInfo(F))
-            Known = FI->getModRefInfoForGlobal(*GV);
+            Known = FI->getModRefInfoForGlobal(*GV) |
+              getModRefInfoForArgument(CS, GV);
 
   if (Known == MRI_NoModRef)
     return MRI_NoModRef; // No need to query other mod/ref analyses
@@ -801,6 +861,9 @@
                                CallGraph &CG) {
   GlobalsAAResult Result(M.getDataLayout(), TLI);
 
+  // Discover which functions aren't recursive, to feed into AnalyzeGlobals.
+  Result.CollectNonRecursiveFunctions(CG);
+
   // Find non-addr taken globals.
   Result.AnalyzeGlobals(M);
 
Index: lib/Analysis/ScalarEvolution.cpp
===================================================================
--- lib/Analysis/ScalarEvolution.cpp
+++ lib/Analysis/ScalarEvolution.cpp
@@ -8474,7 +8474,7 @@
   F.printAsOperand(OS, /*PrintType=*/false);
   OS << "\n";
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-    if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
+    if (isSCEVable(I->getType()) /*&& !isa<CmpInst>(*I)*/) {
       OS << *I << '\n';
       OS << "  -->  ";
       const SCEV *SV = SE.getSCEV(&*I);
Index: test/Analysis/GlobalsModRef/memset-escape.ll
===================================================================
--- /dev/null
+++ test/Analysis/GlobalsModRef/memset-escape.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -O1 -S -enable-non-lto-gmr=true | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = internal global [3 x i32] zeroinitializer, align 4
+@b = common global i32 0, align 4
+
+; The important thing we're checking for here is the reload of (some element of)
+; @a after the memset.
+
+; CHECK-LABEL: @main
+; CHECK: call void @llvm.memset.p0i8.i64{{.*}} @a
+; CHECK: store i32 3
+; CHECK: load i32, i32* getelementptr {{.*}} @a
+; CHECK: icmp eq i32
+; CHECK: br i1
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  %c = alloca [1 x i32], align 4
+  store i32 0, i32* %retval, align 4
+  %0 = bitcast [1 x i32]* %c to i8*
+  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 4, i32 4, i1 false)
+  store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+  store i32 0, i32* @b, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %1 = load i32, i32* @b, align 4
+  %cmp = icmp slt i32 %1, 3
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32, i32* @b, align 4
+  %idxprom = sext i32 %2 to i64
+  %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* @a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %3 = load i32, i32* @b, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, i32* @b, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %4 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+  %cmp1 = icmp ne i32 %4, 0
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.end
+  call void @abort() #3
+  unreachable
+
+if.end:                                           ; preds = %for.end
+  ret i32 0
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly
+
+; Function Attrs: noreturn nounwind
+declare void @abort() noreturn nounwind
Index: test/Analysis/GlobalsModRef/nocapture.ll
===================================================================
--- /dev/null
+++ test/Analysis/GlobalsModRef/nocapture.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -globals-aa -aa-eval -print-all-alias-modref-info -S 2>&1 | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = internal global i32 0, align 4
+@b = internal global i32 0, align 4
+
+define void @g(i32* %p, void (i32*)* nocapture %ptr) {
+entry:
+  tail call void %ptr(i32* %p) #1
+  ret void
+}
+
+; CHECK-LABEL: Function: f
+; CHECK: MayAlias: i32* %p, i32* @a
+; CHECK: MayAlias: i32* %q, i32* @a
+define i32 @f(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) {
+entry:
+  tail call void @g(i32* nonnull @a, void (i32*)* %ptr)
+  %arrayidx = getelementptr inbounds i32, i32* %p, i64 0
+  %z1 = load i32, i32* %arrayidx, align 4
+  %z2 = load i32, i32* %q, align 4
+  %add = add nsw i32 %z2, %z1
+  store i32 %add, i32* %q, align 4
+  ret i32 4
+}
+
+define void @g2(i32* nocapture %p, void (i32*)* nocapture %ptr) {
+entry:
+  tail call void %ptr(i32* %p) #1
+  ret void
+}
+
+; CHECK-LABEL: Function: f2
+; CHECK: NoAlias: i32* %p, i32* @b
+; CHECK: NoAlias: i32* %q, i32* @b
+define i32 @f2(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) {
+entry:
+  tail call void @g2(i32* nonnull @b, void (i32*)* %ptr)
+  %arrayidx = getelementptr inbounds i32, i32* %p, i64 0
+  %z1 = load i32, i32* %arrayidx, align 4
+  %z2 = load i32, i32* %q, align 4
+  %add = add nsw i32 %z2, %z1
+  store i32 %add, i32* %q, align 4
+  ret i32 4
+}