diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp
--- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp
@@ -706,8 +706,24 @@
   Test = Annotations(Case);
   AST = TestTU::withCode(Test.code()).build();
   T = makeSelectionTree(Case, AST);
-
   EXPECT_EQ("IntegerLiteral", T.commonAncestor()->kind());
+
+  // Reduced from private bug involving RETURN_IF_ERROR.
+  // Due to >>-splitting and a bug in isBeforeInTranslationUnit, the inner
+  // S<int> would claim way too many tokens.
+  Case = R"cpp(
+    #define ID(x) x
+    template <typename T> class S {};
+    ID(
+      ID(S<S<int>> x);
+      int ^y;
+    )
+  )cpp";
+  Test = Annotations(Case);
+  AST = TestTU::withCode(Test.code()).build();
+  T = makeSelectionTree(Case, AST);
+  // not TemplateSpecializationTypeLoc!
+  EXPECT_EQ("VarDecl", T.commonAncestor()->kind());
 }
 
 TEST(SelectionTest, Implicit) {
diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -542,10 +542,10 @@
   /// If these match up with a subsequent query, the result can be reused.
   FileID LQueryFID, RQueryFID;
 
-  /// True if LQueryFID was created before RQueryFID.
+  /// The relative order of FileIDs that the CommonFID *immediately* includes.
   ///
   /// This is used to compare macro expansion locations.
-  bool IsLQFIDBeforeRQFID;
+  bool LChildBeforeRChild;
 
   /// The file found in common between the two \#include traces, i.e.,
   /// the nearest common ancestor of the \#include tree.
@@ -559,12 +559,17 @@
   unsigned LCommonOffset, RCommonOffset;
 
 public:
+  InBeforeInTUCacheEntry() = default;
+  InBeforeInTUCacheEntry(FileID L, FileID R) : LQueryFID(L), RQueryFID(R) {
+    assert(L != R);
+  }
+
   /// Return true if the currently cached values match up with
   /// the specified LHS/RHS query.
   ///
   /// If not, we can't use the cache.
-  bool isCacheValid(FileID LHS, FileID RHS) const {
-    return LQueryFID == LHS && RQueryFID == RHS;
+  bool isCacheValid() const {
+    return CommonFID.isValid();
   }
 
   /// If the cache is valid, compute the result given the
@@ -581,29 +586,28 @@
     // one of the locations points at the inclusion/expansion point of the other
     // in which case its FileID will come before the other.
     if (LOffset == ROffset)
-      return IsLQFIDBeforeRQFID;
+      return LChildBeforeRChild;
 
     return LOffset < ROffset;
   }
 
   /// Set up a new query.
-  void setQueryFIDs(FileID LHS, FileID RHS, bool isLFIDBeforeRFID) {
+  /// If it matches the old query, we can keep the cached answer.
+  void setQueryFIDs(FileID LHS, FileID RHS) {
     assert(LHS != RHS);
-    LQueryFID = LHS;
-    RQueryFID = RHS;
-    IsLQFIDBeforeRQFID = isLFIDBeforeRFID;
-  }
-
-  void clear() {
-    LQueryFID = RQueryFID = FileID();
-    IsLQFIDBeforeRQFID = false;
+    if (LQueryFID != LHS || RQueryFID != RHS) {
+      LQueryFID = LHS;
+      RQueryFID = RHS;
+      CommonFID = FileID();
+    }
   }
 
   void setCommonLoc(FileID commonFID, unsigned lCommonOffset,
-                    unsigned rCommonOffset) {
+                    unsigned rCommonOffset, bool LParentBeforeRParent) {
     CommonFID = commonFID;
     LCommonOffset = lCommonOffset;
     RCommonOffset = rCommonOffset;
+    LChildBeforeRChild = LParentBeforeRParent;
   }
 };
 
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -1992,6 +1992,7 @@
   // This is a magic number for limiting the cache size.  It was experimentally
   // derived from a small Objective-C project (where the cache filled
   // out to ~250 items).  We can make it larger if necessary.
+  // FIXME: this is almost certainly full these days. Use an LRU cache?
   enum { MagicCacheSize = 300 };
   IsBeforeInTUCacheKey Key(LFID, RFID);
 
@@ -2000,7 +2001,7 @@
   // use.  When they update the value, the cache will get automatically
   // updated as well.
   if (IBTUCache.size() < MagicCacheSize)
-    return IBTUCache[Key];
+    return IBTUCache.try_emplace(Key, LFID, RFID).first->second;
 
   // Otherwise, do a lookup that will not construct a new value.
   InBeforeInTUCache::iterator I = IBTUCache.find(Key);
@@ -2008,6 +2009,7 @@
     return I->second;
 
   // Fall back to the overflow value.
+  IBTUCacheOverflow.setQueryFIDs(LFID, RFID);
   return IBTUCacheOverflow;
 }
 
@@ -2082,43 +2084,62 @@
 
   // If we are comparing a source location with multiple locations in the same
   // file, we get a big win by caching the result.
-  if (IsBeforeInTUCache.isCacheValid(LOffs.first, ROffs.first))
+  if (IsBeforeInTUCache.isCacheValid())
     return std::make_pair(
         true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second));
 
-  // Okay, we missed in the cache, start updating the cache for this query.
-  IsBeforeInTUCache.setQueryFIDs(LOffs.first, ROffs.first,
-                          /*isLFIDBeforeRFID=*/LOffs.first.ID < ROffs.first.ID);
-
+  // Okay, we missed in the cache, we'll compute the answer and populate it.
   // We need to find the common ancestor. The only way of doing this is to
   // build the complete include chain for one and then walking up the chain
   // of the other looking for a match.
-  // We use a map from FileID to Offset to store the chain. Easier than writing
-  // a custom set hash info that only depends on the first part of a pair.
-  using LocSet = llvm::SmallDenseMap<FileID, unsigned, 16>;
-  LocSet LChain;
+
+  // A location within a FileID on the path up from LOffs to the main file.
+  struct Entry {
+    unsigned Offset;
+    FileID ParentFID; // Used for breaking ties.
+  };
+  llvm::SmallDenseMap<FileID, Entry, 16> LChain;
+
+  FileID Parent;
   do {
-    LChain.insert(LOffs);
+    LChain.try_emplace(LOffs.first, Entry{LOffs.second, Parent});
     // We catch the case where LOffs is in a file included by ROffs and
     // quit early. The other way round unfortunately remains suboptimal.
-  } while (LOffs.first != ROffs.first && !MoveUpIncludeHierarchy(LOffs, *this));
-  LocSet::iterator I;
-  while((I = LChain.find(ROffs.first)) == LChain.end()) {
-    if (MoveUpIncludeHierarchy(ROffs, *this))
-      break; // Met at topmost file.
-  }
-  if (I != LChain.end())
-    LOffs = *I;
+    if (LOffs.first == ROffs.first)
+      break;
+    Parent = LOffs.first;
+  } while (!MoveUpIncludeHierarchy(LOffs, *this));
 
-  // If we exited because we found a nearest common ancestor, compare the
-  // locations within the common file and cache them.
-  if (LOffs.first == ROffs.first) {
-    IsBeforeInTUCache.setCommonLoc(LOffs.first, LOffs.second, ROffs.second);
-    return std::make_pair(
-        true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second));
-  }
-  // Clear the lookup cache, it depends on a common location.
-  IsBeforeInTUCache.clear();
+  Parent = FileID();
+  do {
+    auto I = LChain.find(ROffs.first);
+    if (I != LChain.end()) {
+      // Compare the locations within the common file and cache them.
+      LOffs.first = I->first;
+      LOffs.second = I->second.Offset;
+      // The relative order of LParent and RParent is a tiebreaker when
+      // - locs expand to the same location (occurs in macro arg expansion)
+      // - one loc is a parent of the other (we consider the parent as "first")
+      // For the parent to be first, the invalid file ID must compare smaller.
+      // However loaded FileIDs are <0, so we perform *unsigned* comparison!
+      // This changes the relative order of local vs loaded FileIDs, but it
+      // doesn't matter as these are never mixed in macro expansion.
+      unsigned LParent = I->second.ParentFID.ID;
+      unsigned RParent = Parent.ID;
+      assert((LOffs.second != ROffs.second) || (LParent == 0 || RParent == 0) ||
+             isInSameSLocAddrSpace(getComposedLoc(I->second.ParentFID, 0),
+                                   getComposedLoc(Parent, 0), nullptr) &&
+                 "Mixed local/loaded FileIDs with same include location?");
+      IsBeforeInTUCache.setCommonLoc(LOffs.first, LOffs.second, ROffs.second,
+                                     LParent < RParent);
+      return std::make_pair(
+          true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second));
+    }
+    Parent = ROffs.first;
+  } while (!MoveUpIncludeHierarchy(ROffs, *this));
+
+  // If we found no match, we're not in the same TU.
+  // We don't cache this, but it is rare.
   return std::make_pair(false, false);
 }
 
diff --git a/clang/unittests/Basic/SourceManagerTest.cpp b/clang/unittests/Basic/SourceManagerTest.cpp
--- a/clang/unittests/Basic/SourceManagerTest.cpp
+++ b/clang/unittests/Basic/SourceManagerTest.cpp
@@ -171,6 +171,83 @@
   EXPECT_TRUE(SourceMgr.isBeforeInTranslationUnit(idLoc, macroExpEndLoc));
 }
 
+TEST_F(SourceManagerTest, isBeforeInTranslationUnitWithTokenSplit) {
+  const char *main = R"cpp(
+    #define ID(X) X
+    ID(
+      ID(a >> b)
+      c
+    )
+  )cpp";
+
+  SourceMgr.setMainFileID(
+      SourceMgr.createFileID(llvm::MemoryBuffer::getMemBuffer(main)));
+
+  TrivialModuleLoader ModLoader;
+  HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
+                          Diags, LangOpts, &*Target);
+  Preprocessor PP(std::make_shared<PreprocessorOptions>(), Diags, LangOpts,
+                  SourceMgr, HeaderInfo, ModLoader,
+                  /*IILookup =*/nullptr,
+                  /*OwnsHeaderSearch =*/false);
+  PP.Initialize(*Target);
+  PP.EnterMainSourceFile();
+  llvm::SmallString<8> Scratch;
+
+  std::vector<Token> toks;
+  while (1) {
+    Token tok;
+    PP.Lex(tok);
+    if (tok.is(tok::eof))
+      break;
+    toks.push_back(tok);
+  }
+
+  // Make sure we got the tokens that we expected.
+  ASSERT_EQ(4U, toks.size()) << "a >> b c";
+  // Sanity check their order.
+  for (unsigned I = 0; I < toks.size() - 1; ++I) {
+    EXPECT_TRUE(SourceMgr.isBeforeInTranslationUnit(toks[I].getLocation(),
+                                                    toks[I + 1].getLocation()));
+    EXPECT_FALSE(SourceMgr.isBeforeInTranslationUnit(toks[I + 1].getLocation(),
+                                                     toks[I].getLocation()));
+  }
+
+  // Split the >> into two > tokens, as happens when parsing nested templates.
+  unsigned RightShiftIndex = 1;
+  SourceLocation RightShift = toks[RightShiftIndex].getLocation();
+  EXPECT_EQ(">>", Lexer::getSpelling(SourceMgr.getSpellingLoc(RightShift),
+                                     Scratch, SourceMgr, LangOpts));
+  SourceLocation Greater1 = PP.SplitToken(RightShift, /*Length=*/1);
+  SourceLocation Greater2 = RightShift.getLocWithOffset(1);
+  EXPECT_TRUE(Greater1.isMacroID());
+  EXPECT_EQ(">", Lexer::getSpelling(SourceMgr.getSpellingLoc(Greater1), Scratch,
+                                    SourceMgr, LangOpts));
+  EXPECT_EQ(">", Lexer::getSpelling(SourceMgr.getSpellingLoc(Greater2), Scratch,
+                                    SourceMgr, LangOpts));
+  EXPECT_EQ(SourceMgr.getImmediateExpansionRange(Greater1).getBegin(),
+            RightShift);
+
+  for (unsigned I = 0; I < toks.size(); ++I) {
+    SCOPED_TRACE("Token " + std::to_string(I));
+    // Right-shift is the parent of Greater1, so it compares less.
+    EXPECT_EQ(
+        SourceMgr.isBeforeInTranslationUnit(toks[I].getLocation(), Greater1),
+        I <= RightShiftIndex);
+    EXPECT_EQ(
+        SourceMgr.isBeforeInTranslationUnit(toks[I].getLocation(), Greater2),
+        I <= RightShiftIndex);
+    EXPECT_EQ(
+        SourceMgr.isBeforeInTranslationUnit(Greater1, toks[I].getLocation()),
+        RightShiftIndex < I);
+    EXPECT_EQ(
+        SourceMgr.isBeforeInTranslationUnit(Greater2, toks[I].getLocation()),
+        RightShiftIndex < I);
+  }
+  EXPECT_TRUE(SourceMgr.isBeforeInTranslationUnit(Greater1, Greater2));
+  EXPECT_FALSE(SourceMgr.isBeforeInTranslationUnit(Greater2, Greater1));
+}
+
 TEST_F(SourceManagerTest, getColumnNumber) {
   const char *Source =
     "int x;\n"