diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp --- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp @@ -706,8 +706,24 @@ Test = Annotations(Case); AST = TestTU::withCode(Test.code()).build(); T = makeSelectionTree(Case, AST); - EXPECT_EQ("IntegerLiteral", T.commonAncestor()->kind()); + + // Reduced from private bug involving RETURN_IF_ERROR. + // Due to >>-splitting and a bug in isBeforeInTranslationUnit, the inner + // S would claim way too many tokens. + Case = R"cpp( + #define ID(x) x + template class S {}; + ID( + ID(S> x); + int ^y; + ) + )cpp"; + Test = Annotations(Case); + AST = TestTU::withCode(Test.code()).build(); + T = makeSelectionTree(Case, AST); + // not TemplateSpecializationTypeLoc! + EXPECT_EQ("VarDecl", T.commonAncestor()->kind()); } TEST(SelectionTest, Implicit) { diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -542,10 +542,10 @@ /// If these match up with a subsequent query, the result can be reused. FileID LQueryFID, RQueryFID; - /// True if LQueryFID was created before RQueryFID. + /// The relative order of FileIDs that the CommonFID *immediately* includes. /// /// This is used to compare macro expansion locations. - bool IsLQFIDBeforeRQFID; + bool LChildBeforeRChild; /// The file found in common between the two \#include traces, i.e., /// the nearest common ancestor of the \#include tree. @@ -559,12 +559,17 @@ unsigned LCommonOffset, RCommonOffset; public: + InBeforeInTUCacheEntry() = default; + InBeforeInTUCacheEntry(FileID L, FileID R) : LQueryFID(L), RQueryFID(R) { + assert(L != R); + } + /// Return true if the currently cached values match up with /// the specified LHS/RHS query. /// /// If not, we can't use the cache. - bool isCacheValid(FileID LHS, FileID RHS) const { - return LQueryFID == LHS && RQueryFID == RHS; + bool isCacheValid() const { + return CommonFID.isValid(); } /// If the cache is valid, compute the result given the @@ -581,29 +586,28 @@ // one of the locations points at the inclusion/expansion point of the other // in which case its FileID will come before the other. if (LOffset == ROffset) - return IsLQFIDBeforeRQFID; + return LChildBeforeRChild; return LOffset < ROffset; } /// Set up a new query. - void setQueryFIDs(FileID LHS, FileID RHS, bool isLFIDBeforeRFID) { + /// If it matches the old query, we can keep the cached answer. + void setQueryFIDs(FileID LHS, FileID RHS) { assert(LHS != RHS); - LQueryFID = LHS; - RQueryFID = RHS; - IsLQFIDBeforeRQFID = isLFIDBeforeRFID; - } - - void clear() { - LQueryFID = RQueryFID = FileID(); - IsLQFIDBeforeRQFID = false; + if (LQueryFID != LHS || RQueryFID != RHS) { + LQueryFID = LHS; + RQueryFID = RHS; + CommonFID = FileID(); + } } void setCommonLoc(FileID commonFID, unsigned lCommonOffset, - unsigned rCommonOffset) { + unsigned rCommonOffset, bool LParentBeforeRParent) { CommonFID = commonFID; LCommonOffset = lCommonOffset; RCommonOffset = rCommonOffset; + LChildBeforeRChild = LParentBeforeRParent; } }; diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -1992,6 +1992,7 @@ // This is a magic number for limiting the cache size. It was experimentally // derived from a small Objective-C project (where the cache filled // out to ~250 items). We can make it larger if necessary. + // FIXME: this is almost certainly full these days. Use an LRU cache? enum { MagicCacheSize = 300 }; IsBeforeInTUCacheKey Key(LFID, RFID); @@ -2000,7 +2001,7 @@ // use. When they update the value, the cache will get automatically // updated as well. if (IBTUCache.size() < MagicCacheSize) - return IBTUCache[Key]; + return IBTUCache.try_emplace(Key, LFID, RFID).first->second; // Otherwise, do a lookup that will not construct a new value. InBeforeInTUCache::iterator I = IBTUCache.find(Key); @@ -2008,6 +2009,7 @@ return I->second; // Fall back to the overflow value. + IBTUCacheOverflow.setQueryFIDs(LFID, RFID); return IBTUCacheOverflow; } @@ -2082,43 +2084,62 @@ // If we are comparing a source location with multiple locations in the same // file, we get a big win by caching the result. - if (IsBeforeInTUCache.isCacheValid(LOffs.first, ROffs.first)) + if (IsBeforeInTUCache.isCacheValid()) return std::make_pair( true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second)); - // Okay, we missed in the cache, start updating the cache for this query. - IsBeforeInTUCache.setQueryFIDs(LOffs.first, ROffs.first, - /*isLFIDBeforeRFID=*/LOffs.first.ID < ROffs.first.ID); - + // Okay, we missed in the cache, we'll compute the answer and populate it. // We need to find the common ancestor. The only way of doing this is to // build the complete include chain for one and then walking up the chain // of the other looking for a match. - // We use a map from FileID to Offset to store the chain. Easier than writing - // a custom set hash info that only depends on the first part of a pair. - using LocSet = llvm::SmallDenseMap; - LocSet LChain; + + // A location within a FileID on the path up from LOffs to the main file. + struct Entry { + unsigned Offset; + FileID ParentFID; // Used for breaking ties. + }; + llvm::SmallDenseMap LChain; + + FileID Parent; do { - LChain.insert(LOffs); + LChain.try_emplace(LOffs.first, Entry{LOffs.second, Parent}); // We catch the case where LOffs is in a file included by ROffs and // quit early. The other way round unfortunately remains suboptimal. - } while (LOffs.first != ROffs.first && !MoveUpIncludeHierarchy(LOffs, *this)); - LocSet::iterator I; - while((I = LChain.find(ROffs.first)) == LChain.end()) { - if (MoveUpIncludeHierarchy(ROffs, *this)) - break; // Met at topmost file. - } - if (I != LChain.end()) - LOffs = *I; + if (LOffs.first == ROffs.first) + break; + Parent = LOffs.first; + } while (!MoveUpIncludeHierarchy(LOffs, *this)); - // If we exited because we found a nearest common ancestor, compare the - // locations within the common file and cache them. - if (LOffs.first == ROffs.first) { - IsBeforeInTUCache.setCommonLoc(LOffs.first, LOffs.second, ROffs.second); - return std::make_pair( - true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second)); - } - // Clear the lookup cache, it depends on a common location. - IsBeforeInTUCache.clear(); + Parent = FileID(); + do { + auto I = LChain.find(ROffs.first); + if (I != LChain.end()) { + // Compare the locations within the common file and cache them. + LOffs.first = I->first; + LOffs.second = I->second.Offset; + // The relative order of LParent and RParent is a tiebreaker when + // - locs expand to the same location (occurs in macro arg expansion) + // - one loc is a parent of the other (we consider the parent as "first") + // For the parent to be first, the invalid file ID must compare smaller. + // However loaded FileIDs are <0, so we perform *unsigned* comparison! + // This changes the relative order of local vs loaded FileIDs, but it + // doesn't matter as these are never mixed in macro expansion. + unsigned LParent = I->second.ParentFID.ID; + unsigned RParent = Parent.ID; + assert((LOffs.second != ROffs.second) || (LParent == 0 || RParent == 0) || + isInSameSLocAddrSpace(getComposedLoc(I->second.ParentFID, 0), + getComposedLoc(Parent, 0), nullptr) && + "Mixed local/loaded FileIDs with same include location?"); + IsBeforeInTUCache.setCommonLoc(LOffs.first, LOffs.second, ROffs.second, + LParent < RParent); + return std::make_pair( + true, IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second)); + } + Parent = ROffs.first; + } while (!MoveUpIncludeHierarchy(ROffs, *this)); + + // If we found no match, we're not in the same TU. + // We don't cache this, but it is rare. return std::make_pair(false, false); } diff --git a/clang/unittests/Basic/SourceManagerTest.cpp b/clang/unittests/Basic/SourceManagerTest.cpp --- a/clang/unittests/Basic/SourceManagerTest.cpp +++ b/clang/unittests/Basic/SourceManagerTest.cpp @@ -171,6 +171,83 @@ EXPECT_TRUE(SourceMgr.isBeforeInTranslationUnit(idLoc, macroExpEndLoc)); } +TEST_F(SourceManagerTest, isBeforeInTranslationUnitWithTokenSplit) { + const char *main = R"cpp( + #define ID(X) X + ID( + ID(a >> b) + c + ) + )cpp"; + + SourceMgr.setMainFileID( + SourceMgr.createFileID(llvm::MemoryBuffer::getMemBuffer(main))); + + TrivialModuleLoader ModLoader; + HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, + Diags, LangOpts, &*Target); + Preprocessor PP(std::make_shared(), Diags, LangOpts, + SourceMgr, HeaderInfo, ModLoader, + /*IILookup =*/nullptr, + /*OwnsHeaderSearch =*/false); + PP.Initialize(*Target); + PP.EnterMainSourceFile(); + llvm::SmallString<8> Scratch; + + std::vector toks; + while (1) { + Token tok; + PP.Lex(tok); + if (tok.is(tok::eof)) + break; + toks.push_back(tok); + } + + // Make sure we got the tokens that we expected. + ASSERT_EQ(4U, toks.size()) << "a >> b c"; + // Sanity check their order. + for (unsigned I = 0; I < toks.size() - 1; ++I) { + EXPECT_TRUE(SourceMgr.isBeforeInTranslationUnit(toks[I].getLocation(), + toks[I + 1].getLocation())); + EXPECT_FALSE(SourceMgr.isBeforeInTranslationUnit(toks[I + 1].getLocation(), + toks[I].getLocation())); + } + + // Split the >> into two > tokens, as happens when parsing nested templates. + unsigned RightShiftIndex = 1; + SourceLocation RightShift = toks[RightShiftIndex].getLocation(); + EXPECT_EQ(">>", Lexer::getSpelling(SourceMgr.getSpellingLoc(RightShift), + Scratch, SourceMgr, LangOpts)); + SourceLocation Greater1 = PP.SplitToken(RightShift, /*Length=*/1); + SourceLocation Greater2 = RightShift.getLocWithOffset(1); + EXPECT_TRUE(Greater1.isMacroID()); + EXPECT_EQ(">", Lexer::getSpelling(SourceMgr.getSpellingLoc(Greater1), Scratch, + SourceMgr, LangOpts)); + EXPECT_EQ(">", Lexer::getSpelling(SourceMgr.getSpellingLoc(Greater2), Scratch, + SourceMgr, LangOpts)); + EXPECT_EQ(SourceMgr.getImmediateExpansionRange(Greater1).getBegin(), + RightShift); + + for (unsigned I = 0; I < toks.size(); ++I) { + SCOPED_TRACE("Token " + std::to_string(I)); + // Right-shift is the parent of Greater1, so it compares less. + EXPECT_EQ( + SourceMgr.isBeforeInTranslationUnit(toks[I].getLocation(), Greater1), + I <= RightShiftIndex); + EXPECT_EQ( + SourceMgr.isBeforeInTranslationUnit(toks[I].getLocation(), Greater2), + I <= RightShiftIndex); + EXPECT_EQ( + SourceMgr.isBeforeInTranslationUnit(Greater1, toks[I].getLocation()), + RightShiftIndex < I); + EXPECT_EQ( + SourceMgr.isBeforeInTranslationUnit(Greater2, toks[I].getLocation()), + RightShiftIndex < I); + } + EXPECT_TRUE(SourceMgr.isBeforeInTranslationUnit(Greater1, Greater2)); + EXPECT_FALSE(SourceMgr.isBeforeInTranslationUnit(Greater2, Greater1)); +} + TEST_F(SourceManagerTest, getColumnNumber) { const char *Source = "int x;\n"