diff --git a/clang-tools-extra/clangd/index/FileIndex.h b/clang-tools-extra/clangd/index/FileIndex.h --- a/clang-tools-extra/clangd/index/FileIndex.h +++ b/clang-tools-extra/clangd/index/FileIndex.h @@ -109,17 +109,15 @@ private: bool UseDex; // FIXME: this should be always on. - // Contains information from each file's preamble only. - // These are large, but update fairly infrequently (preambles are stable). + // Contains information from each file's preamble only. Symbols and relations + // are sharded per declaration file to deduplicate multiple symbols and reduce + // memory usage. // Missing information: // - symbol refs (these are always "from the main file") // - definition locations in the main file // - // FIXME: Because the preambles for different TUs have large overlap and - // FileIndex doesn't deduplicate, this uses lots of extra RAM. - // The biggest obstacle in fixing this: the obvious approach of partitioning - // by declaring file (rather than main file) fails if headers provide - // different symbols based on preprocessor state. + // FIXME: We ignore symbol resulting from different PP states while parsing a + // file and store only the last one. FileSymbols PreambleSymbols; SwapIndex PreambleIndex; diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp --- a/clang-tools-extra/clangd/index/FileIndex.cpp +++ b/clang-tools-extra/clangd/index/FileIndex.cpp @@ -10,11 +10,14 @@ #include "CollectMacros.h" #include "Logger.h" #include "ParsedAST.h" +#include "Path.h" #include "SymbolCollector.h" #include "index/CanonicalIncludes.h" #include "index/Index.h" #include "index/MemIndex.h" #include "index/Merge.h" +#include "index/Relation.h" +#include "index/Symbol.h" #include "index/SymbolOrigin.h" #include "index/dex/Dex.h" #include "clang/AST/ASTContext.h" @@ -23,19 +26,21 @@ #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include +#include namespace clang { namespace clangd { +namespace { -static SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr PP, - llvm::ArrayRef DeclsToIndex, - const MainFileMacros *MacroRefsToIndex, - const CanonicalIncludes &Includes, - bool IsIndexMainAST, llvm::StringRef Version) { +SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr PP, + llvm::ArrayRef DeclsToIndex, + const MainFileMacros *MacroRefsToIndex, + const CanonicalIncludes &Includes, bool IsIndexMainAST, + llvm::StringRef Version) { SymbolCollector::Options CollectorOpts; CollectorOpts.CollectIncludePath = true; CollectorOpts.Includes = &Includes; @@ -85,6 +90,53 @@ std::move(Relations)); } +/// Takes slabs coming from a TU(multiple files) and shards them per declaration +/// location. Afterwards updates \p FileSyms. +/// \p HintPath is used to convert file uris stored in symbols into absolute +/// paths used by \p FileSyms. +void shardSlabToFiles(const SymbolSlab &Syms, const RelationSlab &Rels, + FileSymbols &FileSyms, llvm::StringRef HintPath) { + llvm::StringMap FileToSyms; + llvm::StringMap FileToRels; + llvm::DenseMap SymbolIDToFile; + llvm::StringMap URIToPathCache; + auto FileForSymbol = [&](const Symbol &S) -> llvm::StringRef { + llvm::StringRef FileURI = S.CanonicalDeclaration.FileURI; + // Symbols might contain empty URIs when they are not coming from a file, + // e.g. command line macros. + if (FileURI.empty()) + return ""; + auto I = URIToPathCache.try_emplace(FileURI); + if (I.second) { + auto URIPath = URI::resolve(FileURI, HintPath); + assert(URIPath); + I.first->second = *URIPath; + } + return I.first->second; + }; + for (const auto &S : Syms) { + auto File = FileForSymbol(S); + FileToSyms[File].insert(S); + SymbolIDToFile[S.ID] = File; + } + for (const auto &R : Rels) { + auto File = SymbolIDToFile.lookup(R.Subject); + assert(!File.empty() && "unknown symbol in relation slab"); + FileToRels[File].insert(R); + } + for (auto &Entry : FileToSyms) { + auto File = Entry.first(); + auto &SymSlab = Entry.getValue(); + auto &RelSlab = FileToRels[File]; + FileSyms.update(File, + std::make_unique(std::move(SymSlab).build()), + std::make_unique(), + std::make_unique(std::move(RelSlab).build()), + /*CountReferences=*/false); + } +} +} // namespace + SlabTuple indexMainDecls(ParsedAST &AST) { return indexSymbols(AST.getASTContext(), AST.getPreprocessorPtr(), AST.getLocalTopLevelDecls(), &AST.getMacros(), @@ -254,15 +306,17 @@ ASTContext &AST, std::shared_ptr PP, const CanonicalIncludes &Includes) { - auto Slabs = indexHeaderSymbols(Version, AST, std::move(PP), Includes); - PreambleSymbols.update( - Path, std::make_unique(std::move(std::get<0>(Slabs))), - std::make_unique(), - std::make_unique(std::move(std::get<2>(Slabs))), - /*CountReferences=*/false); + SymbolSlab Syms; + RelationSlab Rels; + std::tie(Syms, std::ignore, Rels) = + indexHeaderSymbols(Version, AST, std::move(PP), Includes); + shardSlabToFiles(Syms, Rels, PreambleSymbols, Path); PreambleIndex.reset( PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : IndexType::Light, DuplicateHandling::PickOne)); + vlog("Build dynamic index for header symbols with estimated memory usage of " + "{0} bytes", + PreambleIndex.estimateMemoryUsage()); } void FileIndex::updateMain(PathRef Path, ParsedAST &AST) { @@ -274,6 +328,9 @@ /*CountReferences=*/true); MainFileIndex.reset( MainFileSymbols.buildIndex(IndexType::Light, DuplicateHandling::Merge)); + vlog("Build dynamic index for mainfile symbols with estimated memory usage " + "of {0} bytes", + MainFileIndex.estimateMemoryUsage()); } } // namespace clangd diff --git a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp --- a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp @@ -151,8 +151,9 @@ File.HeaderFilename = (Basename + ".h").str(); File.HeaderCode = std::string(Code); auto AST = File.build(); - M.updatePreamble(File.Filename, /*Version=*/"null", AST.getASTContext(), - AST.getPreprocessorPtr(), AST.getCanonicalIncludes()); + M.updatePreamble(testPath(File.Filename), /*Version=*/"null", + AST.getASTContext(), AST.getPreprocessorPtr(), + AST.getCanonicalIncludes()); } TEST(FileIndexTest, CustomizedURIScheme) { @@ -393,8 +394,9 @@ TU.HeaderCode = "class A {}; class B : public A {};"; auto AST = TU.build(); FileIndex Index; - Index.updatePreamble(TU.Filename, /*Version=*/"null", AST.getASTContext(), - AST.getPreprocessorPtr(), AST.getCanonicalIncludes()); + Index.updatePreamble(testPath(TU.Filename), /*Version=*/"null", + AST.getASTContext(), AST.getPreprocessorPtr(), + AST.getCanonicalIncludes()); SymbolID A = findSymbol(TU.headerSymbols(), "A").ID; uint32_t Results = 0; RelationsRequest Req; diff --git a/clang-tools-extra/clangd/unittests/TestTU.cpp b/clang-tools-extra/clangd/unittests/TestTU.cpp --- a/clang-tools-extra/clangd/unittests/TestTU.cpp +++ b/clang-tools-extra/clangd/unittests/TestTU.cpp @@ -116,9 +116,10 @@ std::unique_ptr TestTU::index() const { auto AST = build(); auto Idx = std::make_unique(/*UseDex=*/true); - Idx->updatePreamble(Filename, /*Version=*/"null", AST.getASTContext(), - AST.getPreprocessorPtr(), AST.getCanonicalIncludes()); - Idx->updateMain(Filename, AST); + Idx->updatePreamble(testPath(Filename), /*Version=*/"null", + AST.getASTContext(), AST.getPreprocessorPtr(), + AST.getCanonicalIncludes()); + Idx->updateMain(testPath(Filename), AST); return std::move(Idx); }