diff --git a/llvm/test/tools/llvm-diff/assumption-report-order.ll b/llvm/test/tools/llvm-diff/assumption-report-order.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/tools/llvm-diff/assumption-report-order.ll
@@ -0,0 +1,47 @@
+; Check that differences are reported in the BB processing order
+; following the control flow, independent on whether the diff was depending
+; on an assumption or not.
+;
+; Replace %newvar1 with %newvar2 in the phi node. This can only
+; be detected to be different once BB2 has been processed, so leads to a assumption
+; and is detected to diff later on.
+; Also, replace the 1000 by 2000 in BB1, which is detected directly.
+;
+; RUN: rm -f %t.ll
+; RUN: cat %s | sed -e 's/ %newvar1, %BB2 / %newvar2, %BB2 /' | sed -e 's/1000/2000/' > %t.ll
+; RUN: not llvm-diff %s %t.ll 2>&1 | FileCheck %s
+
+; CHECK:      in function func:
+; CHECK-NEXT:   in block %BB0:
+; CHECK-NEXT:     >   %var = phi i32 [ 0, %ENTRY ], [ %newvar2, %BB2 ]
+; CHECK-NEXT:     <   %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB2 ]
+; CHECK-NEXT:   in block %BB1:
+; CHECK-NEXT:     >   %diffvar = add i32 %var, 2000
+; CHECK-NEXT:     <   %diffvar = add i32 %var, 1000
+
+define i32 @func() {
+ENTRY:
+  br label %BB0
+
+BB0:
+  ; When diffing this phi node, we need to detect whether
+  ; %newvar1 is equivalent, which is not known until BB2 has been processed.
+  %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB2 ]
+  %cnd = icmp eq i32 %var, 0
+  br i1 %cnd, label %BB1, label %END
+
+BB1:
+  %diffvar = add i32 %var, 1000
+  br label %BB1
+
+BB2:
+  %newvar1 = add i32 %var, 1
+  %newvar2 = add i32 %var, 2
+  br label %BB0
+
+END:
+  ; Equivalence of the ret depends on equivalence of %var.
+  ; Even if %var differs, we do not report a diff here, because
+  ; this is an indirect diff caused by another diff.
+  ret i32 %var
+}
diff --git a/llvm/test/tools/llvm-diff/loop.ll b/llvm/test/tools/llvm-diff/loop.ll
--- a/llvm/test/tools/llvm-diff/loop.ll
+++ b/llvm/test/tools/llvm-diff/loop.ll
@@ -1,6 +1,5 @@
-; Diff file with itself
-; Due to a current limitation in llvm-diff, a diff is reported here.
-; RUN: not llvm-diff %s %s 2>&1 | FileCheck --check-prefix=SAME-FILE %s
+; Diff file with itself, assert no difference by return code
+; RUN: llvm-diff %s %s
 
 ; Replace %newvar1 with %newvar2 in the phi node. This can only
 ; be detected to be different once BB1 has been processed.
@@ -8,23 +7,10 @@
 ; RUN: cat %s | sed -e 's/ %newvar1, %BB1 / %newvar2, %BB1 /' > %t.ll
 ; RUN: not llvm-diff %s %t.ll 2>&1 | FileCheck --check-prefix DIFFERENT-VAR %s
 
-; SAME-FILE:      in function func:
-; SAME-FILE-NEXT:   in block %BB0:
-; SAME-FILE-NEXT:     >   %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB1 ]
-; SAME-FILE-NEXT:     >   %cnd = icmp eq i32 %var, 0
-; SAME-FILE-NEXT:     >   br i1 %cnd, label %BB1, label %END
-; SAME-FILE-NEXT:     <   %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB1 ]
-; SAME-FILE-NEXT:     <   %cnd = icmp eq i32 %var, 0
-; SAME-FILE-NEXT:     <   br i1 %cnd, label %BB1, label %END
-
 ; DIFFERENT-VAR:      in function func:
 ; DIFFERENT-VAR-NEXT:   in block %BB0:
 ; DIFFERENT-VAR-NEXT:     >   %var = phi i32 [ 0, %ENTRY ], [ %newvar2, %BB1 ]
-; DIFFERENT-VAR-NEXT:     >   %cnd = icmp eq i32 %var, 0
-; DIFFERENT-VAR-NEXT:     >   br i1 %cnd, label %BB1, label %END
 ; DIFFERENT-VAR-NEXT:     <   %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB1 ]
-; DIFFERENT-VAR-NEXT:     <   %cnd = icmp eq i32 %var, 0
-; DIFFERENT-VAR-NEXT:     <   br i1 %cnd, label %BB1, label %END
 define i32 @func() {
 ENTRY:
   br label %BB0
diff --git a/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp b/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp
--- a/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp
+++ b/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
@@ -119,14 +120,99 @@
   const Value *SavedLHS;
   const Value *SavedRHS;
 
-  /// The current mapping from old local values to new local values.
+  // The current mapping from old local values to new local values.
   DenseMap<const Value *, const Value *> Values;
 
-  /// The current mapping from old blocks to new blocks.
+  // The current mapping from old blocks to new blocks.
   DenseMap<const BasicBlock *, const BasicBlock *> Blocks;
 
+  // The tentative mapping from old local values while comparing a pair of
+  // basic blocks. Once the pair has been processed, the tentative mapping is
+  // committed to the Values map.
   DenseSet<std::pair<const Value *, const Value *>> TentativeValues;
 
+  // Equivalence Assumptions
+  //
+  // For basic blocks in loops, some values in phi nodes may depend on
+  // values from not yet processed basic blocks in the loop. When encountering
+  // such values, we optimistically asssume their equivalence and store this
+  // assumption in a BlockDiffCandidate for the pair of compared BBs.
+  //
+  // Once we have diffed all BBs, for every BlockDiffCandidate, we check all
+  // stored assumptions using the Values map that stores proven equivalences
+  // between the old and new values, and report a diff if an assumption cannot
+  // be proven to be true.
+  //
+  // Note that after having made an assumption, all further determined
+  // equivalences implicitly depend on that assumption. These will not be
+  // reverted or reported if the assumption proves to be false, because these
+  // are considered indirect diffs caused by earlier direct diffs.
+  //
+  // We aim to avoid false negatives in llvm-diff, that is, ensure that
+  // whenever no diff is reported, the functions are indeed equal. If
+  // assumptions were made, this is not entirely clear, because in principle we
+  // could end up with a circular proof where the proof of equivalence of two
+  // nodes is depending on the assumption of their equivalence.
+  //
+  // To see that assumptions do not add false negatives, note that if we do not
+  // report a diff, this means that there is an equivalence mapping between old
+  // and new values that is consistent with all assumptions made. The circular
+  // dependency that exists on an IR value level does not exist at run time,
+  // because the values selected by the phi nodes must always already have been
+  // computed. Hence, we can prove equivalence of the old and new functions by
+  // considering step-wise parallel execution, and incrementally proving
+  // equivalence of every new computed value. Another way to think about it is
+  // to imagine cloning the loop BBs for every iteration, turning the loops
+  // into (possibly infinite) DAGs, and proving equivalence by induction on the
+  // iteration, using the computed value mapping.
+
+  // The class BlockDiffCandidate stores pairs which either have already been
+  // proven to differ, or pairs whose equivalence depends on assumptions to be
+  // verified later.
+  struct BlockDiffCandidate {
+    const BasicBlock *LBB;
+    const BasicBlock *RBB;
+    // Maps old values to assumed-to-be-equivalent new values
+    SmallDenseMap<const Value *, const Value *> EquivalenceAssumptions;
+    // If set, we already know the blocks differ.
+    bool KnownToDiffer;
+  };
+
+  // List of block diff candidates in the order found by processing.
+  // We generate reports in this order.
+  // For every LBB, there may only be one corresponding RBB.
+  SmallVector<BlockDiffCandidate> BlockDiffCandidates;
+  // Maps LBB to the index of its BlockDiffCandidate, if existing.
+  DenseMap<const BasicBlock *, uint64_t> BlockDiffCandidateIndices;
+
+  // Note: Every LBB must always be queried together with the same RBB.
+  // The returned reference is not permanently valid and should not be stored.
+  BlockDiffCandidate &getOrCreateBlockDiffCandidate(const BasicBlock *LBB,
+                                                    const BasicBlock *RBB) {
+    auto It = BlockDiffCandidateIndices.find(LBB);
+    // Check if LBB already has a diff candidate
+    if (It == BlockDiffCandidateIndices.end()) {
+      // Add new one
+      BlockDiffCandidateIndices[LBB] = BlockDiffCandidates.size();
+      BlockDiffCandidates.push_back(
+          {LBB, RBB, SmallDenseMap<const Value *, const Value *>(), false});
+      return BlockDiffCandidates.back();
+    }
+    // Use existing one
+    BlockDiffCandidate &Result = BlockDiffCandidates[It->second];
+    assert(Result.RBB == RBB && "Inconsistent basic block pairing!");
+    return Result;
+  }
+
+  // Optionally passed to equivalence checker functions, so these can add
+  // assumptions in BlockDiffCandidates. Its presence controls whether
+  // assumptions are generated.
+  struct AssumptionContext {
+    // The two basic blocks that need the two compared values to be equivalent.
+    const BasicBlock *LBB;
+    const BasicBlock *RBB;
+  };
+
   unsigned getUnprocPredCount(const BasicBlock *Block) const {
     unsigned Count = 0;
     for (const_pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E;
@@ -180,7 +266,7 @@
   void unify(const Instruction *L, const Instruction *R) {
     DifferenceEngine::Context C(Engine, L, R);
 
-    bool Result = diff(L, R, true, true);
+    bool Result = diff(L, R, true, true, true);
     assert(!Result && "structural differences second time around?");
     (void) Result;
     if (!L->use_empty())
@@ -194,6 +280,26 @@
     }
   }
 
+  void checkAndReportDiffCandidates() {
+    for (BlockDiffCandidate &BDC : BlockDiffCandidates) {
+
+      // Check assumptions
+      for (const auto &[L, R] : BDC.EquivalenceAssumptions) {
+        auto It = Values.find(L);
+        if (It == Values.end() || It->second != R) {
+          BDC.KnownToDiffer = true;
+          break;
+        }
+      }
+
+      // Run block diff if the BBs differ
+      if (BDC.KnownToDiffer) {
+        DifferenceEngine::Context C(Engine, BDC.LBB, BDC.RBB);
+        runBlockDiff(BDC.LBB->begin(), BDC.RBB->begin());
+      }
+    }
+  }
+
   void diff(const BasicBlock *L, const BasicBlock *R) {
     DifferenceEngine::Context C(Engine, L, R);
 
@@ -206,9 +312,14 @@
 
       // If the instructions differ, start the more sophisticated diff
       // algorithm at the start of the block.
-      if (diff(LeftI, RightI, false, false)) {
+      if (diff(LeftI, RightI, false, false, true)) {
         TentativeValues.clear();
-        return runBlockDiff(L->begin(), R->begin());
+        // Register (L, R) as diffing pair. Note that we could directly emit a
+        // block diff here, but this way we ensure all diffs are emitted in one
+        // consistent order, independent of whether the diffs were detected
+        // immediately or via invalid assumptions.
+        getOrCreateBlockDiffCandidate(L, R).KnownToDiffer = true;
+        return;
       }
 
       // Otherwise, tentatively unify them.
@@ -232,7 +343,9 @@
 
   bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) {
     // FIXME: call attributes
-    if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand())) {
+    AssumptionContext AC = {L.getParent(), R.getParent()};
+    if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand(),
+                              &AC)) {
       if (Complain) Engine.log("called functions differ");
       return true;
     }
@@ -241,7 +354,7 @@
       return true;
     }
     for (unsigned I = 0, E = L.arg_size(); I != E; ++I)
-      if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I))) {
+      if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I), &AC)) {
         if (Complain)
           Engine.logf("arguments %l and %r differ")
               << L.getArgOperand(I) << R.getArgOperand(I);
@@ -250,9 +363,15 @@
     return false;
   }
 
+  // If AllowAssumptions is enabled, whenever we encounter a pair of values
+  // that we cannot prove to be equivalent, we assume equivalence and store that
+  // assumption to be checked later in BlockDiffCandidates.
   bool diff(const Instruction *L, const Instruction *R, bool Complain,
-            bool TryUnify) {
+            bool TryUnify, bool AllowAssumptions) {
     // FIXME: metadata (if Complain is set)
+    AssumptionContext ACValue = {L->getParent(), R->getParent()};
+    // nullptr AssumptionContext disables assumption generation.
+    const AssumptionContext *AC = AllowAssumptions ? &ACValue : nullptr;
 
     // Different opcodes always imply different operations.
     if (L->getOpcode() != R->getOpcode()) {
@@ -291,7 +410,7 @@
           tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I));
 
         if (!equivalentAsOperands(LI.getIncomingValue(I),
-                                  RI.getIncomingValue(I))) {
+                                  RI.getIncomingValue(I), AC)) {
           if (Complain)
             Engine.log("PHI node incoming values differ");
           return true;
@@ -343,7 +462,7 @@
       }
 
       if (LI->isConditional()) {
-        if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
+        if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) {
           if (Complain) Engine.log("branch conditions differ");
           return true;
         }
@@ -360,7 +479,7 @@
         return true;
       }
 
-      if (!equivalentAsOperands(LI->getAddress(), RI->getAddress())) {
+      if (!equivalentAsOperands(LI->getAddress(), RI->getAddress(), AC)) {
         if (Complain) Engine.log("indirectbr addresses differ");
         return true;
       }
@@ -375,7 +494,7 @@
     } else if (isa<SwitchInst>(L)) {
       const SwitchInst *LI = cast<SwitchInst>(L);
       const SwitchInst *RI = cast<SwitchInst>(R);
-      if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
+      if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) {
         if (Complain) Engine.log("switch conditions differ");
         return true;
       }
@@ -421,7 +540,7 @@
 
     for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
       Value *LO = L->getOperand(I), *RO = R->getOperand(I);
-      if (!equivalentAsOperands(LO, RO)) {
+      if (!equivalentAsOperands(LO, RO, AC)) {
         if (Complain) Engine.logf("operands %l and %r differ") << LO << RO;
         return true;
       }
@@ -431,7 +550,8 @@
   }
 
 public:
-  bool equivalentAsOperands(const Constant *L, const Constant *R) {
+  bool equivalentAsOperands(const Constant *L, const Constant *R,
+                            const AssumptionContext *AC) {
     // Use equality as a preliminary filter.
     if (L == R)
       return true;
@@ -446,8 +566,8 @@
 
     // Compare constant expressions structurally.
     if (isa<ConstantExpr>(L))
-      return equivalentAsOperands(cast<ConstantExpr>(L),
-                                  cast<ConstantExpr>(R));
+      return equivalentAsOperands(cast<ConstantExpr>(L), cast<ConstantExpr>(R),
+                                  AC);
 
     // Constants of the "same type" don't always actually have the same
     // type; I don't know why.  Just white-list them.
@@ -467,7 +587,7 @@
       if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements())
         return false;
       for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) {
-        if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i)))
+        if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i), AC))
           return false;
       }
       return true;
@@ -484,7 +604,7 @@
 
       for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) {
         if (!equivalentAsOperands(CAL->getAggregateElement(I),
-                                  CAR->getAggregateElement(I)))
+                                  CAR->getAggregateElement(I), AC))
           return false;
       }
 
@@ -520,7 +640,7 @@
           continue;
         }
 
-        if (!equivalentAsOperands(LAgg, RAgg)) {
+        if (!equivalentAsOperands(LAgg, RAgg, AC)) {
           return false;
         }
       }
@@ -531,7 +651,8 @@
     return false;
   }
 
-  bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R) {
+  bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R,
+                            const AssumptionContext *AC) {
     if (L == R)
       return true;
 
@@ -570,14 +691,25 @@
         continue;
       }
 
-      if (!equivalentAsOperands(LOp, ROp))
+      if (!equivalentAsOperands(LOp, ROp, AC))
         return false;
     }
 
     return true;
   }
 
-  bool equivalentAsOperands(const Value *L, const Value *R) {
+  // There are cases where we cannot determine whether two values are
+  // equivalent, because it depends on not yet processed basic blocks -- see the
+  // documentation on assumptions.
+  //
+  // AC is the context in which we are currently performing a diff.
+  // When we encounter a pair of values for which we can neither prove
+  // equivalence nor the opposite, we do the following:
+  //  * If AC is nullptr, we treat the pair as non-equivalent.
+  //  * If AC is set, we add an assumption for the basic blocks given by AC,
+  //    and treat the pair as equivalent. The assumption is checked later.
+  bool equivalentAsOperands(const Value *L, const Value *R,
+                            const AssumptionContext *AC) {
     // Fall out if the values have different kind.
     // This possibly shouldn't take priority over oracles.
     if (L->getValueID() != R->getValueID())
@@ -587,10 +719,38 @@
     //                  InlineAsm, MDNode, MDString, PseudoSourceValue
 
     if (isa<Constant>(L))
-      return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R));
+      return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R), AC);
 
-    if (isa<Instruction>(L))
-      return Values[L] == R || TentativeValues.count(std::make_pair(L, R));
+    if (isa<Instruction>(L)) {
+      auto It = Values.find(L);
+      if (It != Values.end())
+        return It->second == R;
+
+      if (TentativeValues.count(std::make_pair(L, R)))
+        return true;
+
+      // L and R might be equivalent, this could depend on not yet processed
+      // basic blocks, so we cannot decide here.
+      if (AC) {
+        // Add an assumption, unless there is a conflict with an existing one
+        BlockDiffCandidate &BDC =
+            getOrCreateBlockDiffCandidate(AC->LBB, AC->RBB);
+        auto InsertionResult = BDC.EquivalenceAssumptions.insert({L, R});
+        if (!InsertionResult.second && InsertionResult.first->second != R) {
+          // We already have a conflicting equivalence assumption for L, so at
+          // least one must be wrong, and we know that there is a diff.
+          BDC.KnownToDiffer = true;
+          BDC.EquivalenceAssumptions.clear();
+          return false;
+        }
+        // Optimistically assume equivalence, and check later once all BBs
+        // have been processed.
+        return true;
+      }
+
+      // Assumptions disabled, so pessimistically assume non-equivalence.
+      return false;
+    }
 
     if (isa<Argument>(L))
       return Values[L] == R;
@@ -613,6 +773,8 @@
         Queue(QueueSorter(*this_())) {}
 
   void diff(const Function *L, const Function *R) {
+    assert(Values.empty() && "Multiple diffs per engine are not supported!");
+
     if (L->arg_size() != R->arg_size())
       Engine.log("different argument counts");
 
@@ -624,6 +786,7 @@
 
     tryUnify(&*L->begin(), &*R->begin());
     processQueue();
+    checkAndReportDiffCandidates();
   }
 };
 
@@ -636,7 +799,7 @@
 
 bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L,
                                                  const Instruction *R) {
-  return !diff(L, R, false, false);
+  return !diff(L, R, false, false, false);
 }
 
 void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart,
@@ -764,7 +927,7 @@
     const CallInst *LCall = cast<CallInst>(&*I);
     const InvokeInst *RInvoke = cast<InvokeInst>(RTerm);
     if (!equivalentAsOperands(LCall->getCalledOperand(),
-                              RInvoke->getCalledOperand()))
+                              RInvoke->getCalledOperand(), nullptr))
       return;
     if (!LCall->use_empty())
       Values[LCall] = RInvoke;
@@ -778,7 +941,7 @@
     const CallInst *RCall = cast<CallInst>(I);
     const InvokeInst *LInvoke = cast<InvokeInst>(LTerm);
     if (!equivalentAsOperands(LInvoke->getCalledOperand(),
-                              RCall->getCalledOperand()))
+                              RCall->getCalledOperand(), nullptr))
       return;
     if (!LInvoke->use_empty())
       Values[LInvoke] = RCall;
@@ -867,7 +1030,8 @@
     if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() &&
         GVR->hasLocalLinkage() && GVR->hasUniqueInitializer())
       return FunctionDifferenceEngine(*this, GVL, GVR)
-          .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer());
+          .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer(),
+                                nullptr);
   }
 
   return L->getName() == R->getName();