Index: clangd/ClangdServer.cpp =================================================================== --- clangd/ClangdServer.cpp +++ clangd/ClangdServer.cpp @@ -83,8 +83,9 @@ : CDB(CDB), DiagConsumer(DiagConsumer), FSProvider(FSProvider), ResourceDir(Opts.ResourceDir ? Opts.ResourceDir->str() : getStandardResourceDir()), - FileIdx(Opts.BuildDynamicSymbolIndex ? new FileIndex(Opts.URISchemes) - : nullptr), + FileIdx(Opts.BuildDynamicSymbolIndex + ? new FileIndex(Opts.URISchemes, Opts.StaticIndex) + : nullptr), PCHs(std::make_shared()), // Pass a callback into `WorkScheduler` to extract symbols from a newly // parsed file and rebuild the file index synchronously each time an AST Index: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp =================================================================== --- clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp +++ clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp @@ -31,6 +31,8 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/YAMLTraits.h" +#include +#include using namespace llvm; using namespace clang::tooling; @@ -40,6 +42,9 @@ namespace clangd { namespace { +// The index to prepend to +const llvm::StringRef kFileDigestKeyPrefix = "digest:"; + static llvm::cl::opt AssumedHeaderDir( "assume-header-dir", llvm::cl::desc("The index includes header that a symbol is defined in. " @@ -92,9 +97,11 @@ } auto Symbols = Collector->takeSymbols(); - for (const auto &Sym : Symbols) { + for (const auto &Sym : Symbols) Ctx->reportResult(Sym.ID.str(), SymbolToYAML(Sym)); - } + for (const auto &Digest : Symbols.fileDigests()) + Ctx->reportResult((kFileDigestKeyPrefix + Digest.first).str(), + std::to_string(Digest.second)); } private: @@ -130,15 +137,22 @@ llvm::BumpPtrAllocator Arena; Symbol::Details Scratch; Results->forEachResult([&](llvm::StringRef Key, llvm::StringRef Value) { - Arena.Reset(); - llvm::yaml::Input Yin(Value, &Arena); - auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena); - clang::clangd::SymbolID ID; - Key >> ID; - if (const auto *Existing = UniqueSymbols.find(ID)) - UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch)); - else - UniqueSymbols.insert(Sym); + if (Key.startswith(kFileDigestKeyPrefix)) { + size_t Digest; + std::stringstream(Value) >> Digest; + UniqueSymbols.addFileDigest(Key.substr(kFileDigestKeyPrefix.size()), + Digest); + } else { + Arena.Reset(); + llvm::yaml::Input Yin(Value, &Arena); + auto Sym = clang::clangd::SymbolFromYAML(Yin, Arena); + clang::clangd::SymbolID ID; + Key >> ID; + if (const auto *Existing = UniqueSymbols.find(ID)) + UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch)); + else + UniqueSymbols.insert(Sym); + } }); return std::move(UniqueSymbols).build(); } Index: clangd/index/FileIndex.h =================================================================== --- clangd/index/FileIndex.h +++ clangd/index/FileIndex.h @@ -20,6 +20,7 @@ #include "Index.h" #include "MemIndex.h" #include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/DenseMap.h" namespace clang { namespace clangd { @@ -44,7 +45,7 @@ void update(PathRef Path, std::unique_ptr Slab); // The shared_ptr keeps the symbols alive - std::shared_ptr> allSymbols(); + std::shared_ptr allSymbols(); private: mutable std::mutex Mutex; @@ -56,9 +57,13 @@ /// \brief This manages symbls from files and an in-memory index on all symbols. class FileIndex : public SymbolIndex { public: - /// If URISchemes is empty, the default schemes in SymbolCollector will be + /// If \p URISchemes is empty, the default schemes in SymbolCollector will be /// used. - FileIndex(std::vector URISchemes = {}); + /// \p OverlayedIndex is optional underlying index overlayed by the file + /// index. If an overlayed index is provided, this will ignore symbols from + /// headers whose file digests are the same. + FileIndex(std::vector URISchemes = {}, + const SymbolIndex *OverlayedIndex = nullptr); /// \brief Update symbols in \p Path with symbols in \p AST. If \p AST is /// nullptr, this removes all symbols in the file. @@ -73,10 +78,16 @@ void lookup(const LookupRequest &Req, llvm::function_ref Callback) const override; + void + fileDigests(const FileDigestRequest &Req, + llvm::function_ref + Callback) const override; + private: FileSymbols FSymbols; MemIndex Index; std::vector URISchemes; + const SymbolIndex *OverlayedIndex; }; /// Retrieves namespace and class level symbols in \p AST. Index: clangd/index/FileIndex.cpp =================================================================== --- clangd/index/FileIndex.cpp +++ clangd/index/FileIndex.cpp @@ -8,9 +8,14 @@ //===----------------------------------------------------------------------===// #include "FileIndex.h" +#include "Index.h" +#include "MemIndex.h" #include "SymbolCollector.h" #include "clang/Index/IndexingAction.h" #include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" namespace clang { namespace clangd { @@ -45,8 +50,9 @@ return Collector.takeSymbols(); } -FileIndex::FileIndex(std::vector URISchemes) - : URISchemes(std::move(URISchemes)) {} +FileIndex::FileIndex(std::vector URISchemes, + const SymbolIndex *OverlayedIndex) + : URISchemes(std::move(URISchemes)), OverlayedIndex(OverlayedIndex) {} void FileSymbols::update(PathRef Path, std::unique_ptr Slab) { std::lock_guard Lock(Mutex); @@ -56,13 +62,14 @@ FileToSlabs[Path] = std::move(Slab); } -std::shared_ptr> FileSymbols::allSymbols() { +std::shared_ptr FileSymbols::allSymbols() { // The snapshot manages life time of symbol slabs and provides pointers of all // symbols in all slabs. struct Snapshot { - std::vector Pointers; + MemIndex::SymbolSlabView View; std::vector> KeepAlive; }; + auto Snap = std::make_shared(); { std::lock_guard Lock(Mutex); @@ -70,13 +77,39 @@ for (const auto &FileAndSlab : FileToSlabs) { Snap->KeepAlive.push_back(FileAndSlab.second); for (const auto &Iter : *FileAndSlab.second) - Snap->Pointers.push_back(&Iter); + Snap->View.Pointers.push_back(&Iter); + const auto &SlabDigests = FileAndSlab.second->fileDigests(); + Snap->View.Digests.insert(SlabDigests.begin(), SlabDigests.end()); } } - auto *Pointers = &Snap->Pointers; + auto *View = &Snap->View; // Use aliasing constructor to keep the snapshot alive along with the // pointers. - return {std::move(Snap), Pointers}; + return {std::move(Snap), View}; +} + +static SymbolSlab dropSymbolsFromIndexedFiles(const SymbolIndex &Index, + SymbolSlab Slab) { + const auto &SlabDigests = Slab.fileDigests(); + FileDigestRequest Req; + for (const auto &Digest : SlabDigests) + Req.URIs.insert(Digest.first); + + llvm::StringSet<> IndexedURIs; + Index.fileDigests(Req, [&](StringRef URI, FileDigest Digest) { + auto I = SlabDigests.find(URI); + if ((I != SlabDigests.end()) && (I->second == Digest)) + IndexedURIs.insert(URI); + }); + if (IndexedURIs.empty()) + return Slab; + SymbolSlab::Builder Syms; + for (const auto &Sym : Slab) + if (IndexedURIs.find(Sym.CanonicalDeclaration.FileURI) == IndexedURIs.end()) + Syms.insert(Sym); + for (const auto &Digest : SlabDigests) + Syms.addFileDigest(Digest.first, Digest.second); + return std::move(Syms).build(); } void FileIndex::update(PathRef Path, ASTContext *AST, @@ -86,7 +119,10 @@ } else { assert(PP); auto Slab = llvm::make_unique(); - *Slab = indexAST(*AST, PP, URISchemes); + auto ASTSlab = indexAST(*AST, PP, URISchemes); + *Slab = OverlayedIndex ? dropSymbolsFromIndexedFiles(*OverlayedIndex, + std::move(ASTSlab)) + : std::move(ASTSlab); FSymbols.update(Path, std::move(Slab)); } auto Symbols = FSymbols.allSymbols(); @@ -105,5 +141,12 @@ Index.lookup(Req, Callback); } +void FileIndex::fileDigests( + const FileDigestRequest &Req, + llvm::function_ref Callback) + const { + Index.fileDigests(Req, Callback); +} + } // namespace clangd } // namespace clang Index: clangd/index/Index.h =================================================================== --- clangd/index/Index.h +++ clangd/index/Index.h @@ -12,12 +12,17 @@ #include "clang/Index/IndexSymbol.h" #include "clang/Lex/Lexer.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include +#include #include namespace clang { @@ -233,12 +238,15 @@ // and signals -> score, so it can be reused for Sema completions. double quality(const Symbol &S); +using FileDigest = uint64_t; + // An immutable symbol container that stores a set of symbols. // The container will maintain the lifetime of the symbols. class SymbolSlab { public: using const_iterator = std::vector::const_iterator; using iterator = const_iterator; + using FileDigests = llvm::DenseMap; SymbolSlab() = default; @@ -246,6 +254,9 @@ const_iterator end() const { return Symbols.end(); } const_iterator find(const SymbolID &SymID) const; + // Note that the string reference keys do not own the underlying string data. + const FileDigests &fileDigests() const { return Digests; } + size_t size() const { return Symbols.size(); } // Estimates the total memory usage. size_t bytes() const { @@ -267,6 +278,9 @@ return I == SymbolIndex.end() ? nullptr : &Symbols[I->second]; } + // Adds the digest for the header file with \p URI. + void addFileDigest(llvm::StringRef URI, size_t Digest); + // Consumes the builder to finalize the slab. SymbolSlab build() &&; @@ -277,14 +291,19 @@ std::vector Symbols; // Values are indices into Symbols vector. llvm::DenseMap SymbolIndex; + FileDigests Digests; }; private: - SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector Symbols) - : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} + SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector Symbols, + FileDigests Digests) + : Arena(std::move(Arena)), Symbols(std::move(Symbols)), + Digests(std::move(Digests)) {} llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not. std::vector Symbols; // Sorted by SymbolID to allow lookup. + // Digests of all header files in TU. + FileDigests Digests; }; struct FuzzyFindRequest { @@ -312,6 +331,10 @@ llvm::DenseSet IDs; }; +struct FileDigestRequest { + llvm::StringSet<> URIs; +}; + /// \brief Interface for symbol indexes that can be used for searching or /// matching symbols among a set of symbols based on names or unique IDs. class SymbolIndex { @@ -334,6 +357,13 @@ lookup(const LookupRequest &Req, llvm::function_ref Callback) const = 0; + /// Gets digests of all files in the index. If a file is not indexed, no + /// digest will be returned. + virtual void + fileDigests(const FileDigestRequest &Req, + llvm::function_ref) + const = 0; + // FIXME: add interfaces for more index use cases: // - getAllOccurrences(SymbolID); }; Index: clangd/index/Index.cpp =================================================================== --- clangd/index/Index.cpp +++ clangd/index/Index.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "Index.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" @@ -76,17 +77,20 @@ return Symbols.end(); } +// Returns a reference to the same string as \p V owned by the arena. +static StringRef InternStr(DenseSet &Strings, + BumpPtrAllocator &Arena, StringRef &V) { + auto R = Strings.insert(V); + if (R.second) { // New entry added to the table, copy the string. + *R.first = V.copy(Arena); + } + return *R.first; +} + // Copy the underlying data of the symbol into the owned arena. static void own(Symbol &S, DenseSet &Strings, BumpPtrAllocator &Arena) { - // Intern replaces V with a reference to the same string owned by the arena. - auto Intern = [&](StringRef &V) { - auto R = Strings.insert(V); - if (R.second) { // New entry added to the table, copy the string. - *R.first = V.copy(Arena); - } - V = *R.first; - }; + auto Intern = [&](StringRef &V) { V = InternStr(Strings, Arena, V); }; // We need to copy every StringRef field onto the arena. Intern(S.Name); @@ -121,6 +125,10 @@ } } +void SymbolSlab::Builder::addFileDigest(llvm::StringRef URI, size_t Digest) { + Digests[InternStr(Strings, Arena, URI)] = Digest; +} + SymbolSlab SymbolSlab::Builder::build() && { Symbols = {Symbols.begin(), Symbols.end()}; // Force shrink-to-fit. // Sort symbols so the slab can binary search over them. @@ -131,7 +139,10 @@ DenseSet Strings; for (auto &S : Symbols) own(S, Strings, NewArena); - return SymbolSlab(std::move(NewArena), std::move(Symbols)); + for (auto &D : Digests) + D.getFirst() = InternStr(Strings, NewArena, D.getFirst()); + return SymbolSlab(std::move(NewArena), std::move(Symbols), + std::move(Digests)); } } // namespace clangd Index: clangd/index/MemIndex.h =================================================================== --- clangd/index/MemIndex.h +++ clangd/index/MemIndex.h @@ -20,9 +20,14 @@ /// can be easily managed in memory. class MemIndex : public SymbolIndex { public: - /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain - /// accessible as long as `Symbols` is kept alive. - void build(std::shared_ptr> Symbols); + /// A view of a set symbols. This does not own the underlying data of symbols. + struct SymbolSlabView { + std::vector Pointers; + SymbolSlab::FileDigests Digests; + }; + /// \brief (Re-)Build index for `Symbols`. All symbol pointers and references + /// must remain accessible as long as `Symbols` is kept alive. + void build(std::shared_ptr Symbols); /// \brief Build index from a symbol slab. static std::unique_ptr build(SymbolSlab Slab); @@ -31,12 +36,16 @@ fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref Callback) const override; - virtual void - lookup(const LookupRequest &Req, - llvm::function_ref Callback) const override; + void lookup(const LookupRequest &Req, + llvm::function_ref Callback) const override; + + void + fileDigests(const FileDigestRequest &Req, + llvm::function_ref + Callback) const override; private: - std::shared_ptr> Symbols; + std::shared_ptr Symbols; // Index is a set of symbols that are deduplicated by symbol IDs. // FIXME: build smarter index structure. llvm::DenseMap Index; Index: clangd/index/MemIndex.cpp =================================================================== --- clangd/index/MemIndex.cpp +++ clangd/index/MemIndex.cpp @@ -15,9 +15,9 @@ namespace clang { namespace clangd { -void MemIndex::build(std::shared_ptr> Syms) { +void MemIndex::build(std::shared_ptr Syms) { llvm::DenseMap TempIndex; - for (const Symbol *Sym : *Syms) + for (const Symbol *Sym : Syms->Pointers) TempIndex[Sym->ID] = Sym; // Swap out the old symbols and index. @@ -64,6 +64,7 @@ void MemIndex::lookup(const LookupRequest &Req, llvm::function_ref Callback) const { + std::lock_guard Lock(Mutex); for (const auto &ID : Req.IDs) { auto I = Index.find(ID); if (I != Index.end()) @@ -71,17 +72,31 @@ } } +void MemIndex::fileDigests( + const FileDigestRequest &Req, + llvm::function_ref Callback) + const { + std::lock_guard Lock(Mutex); + for (auto &U : Req.URIs) { + auto I = Symbols->Digests.find(StringRef(U.getKey())); + if (I != Symbols->Digests.end()) + Callback(I->first, I->second); + } +} + std::unique_ptr MemIndex::build(SymbolSlab Slab) { struct Snapshot { SymbolSlab Slab; - std::vector Pointers; + SymbolSlabView View; }; auto Snap = std::make_shared(); Snap->Slab = std::move(Slab); for (auto &Sym : Snap->Slab) - Snap->Pointers.push_back(&Sym); - auto S = std::shared_ptr>(std::move(Snap), - &Snap->Pointers); + Snap->View.Pointers.push_back(&Sym); + const auto &SlabDigests = Snap->Slab.fileDigests(); + Snap->View.Digests.insert(SlabDigests.begin(), SlabDigests.end()); + + auto S = std::shared_ptr(std::move(Snap), &Snap->View); auto MemIdx = llvm::make_unique(); MemIdx->build(std::move(S)); return std::move(MemIdx); Index: clangd/index/Merge.cpp =================================================================== --- clangd/index/Merge.cpp +++ clangd/index/Merge.cpp @@ -74,6 +74,20 @@ Callback(*Sym); } + void fileDigests(const FileDigestRequest &Req, + llvm::function_ref + Callback) const override { + llvm::StringMap Digests; + // Query the static index first so that dynamic index can override digests + // for the common files later. + Static->fileDigests(Req, + [&](StringRef U, FileDigest D) { Digests[U] = D; }); + Dynamic->fileDigests(Req, + [&](StringRef U, FileDigest D) { Digests[U] = D; }); + for (const auto &Digest : Digests) + Callback(Digest.first(), Digest.second); + } + private: const SymbolIndex *Dynamic, *Static; }; Index: clangd/index/SymbolCollector.h =================================================================== --- clangd/index/SymbolCollector.h +++ clangd/index/SymbolCollector.h @@ -11,6 +11,8 @@ #include "Index.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexSymbol.h" #include "clang/Sema/CodeCompleteConsumer.h" @@ -91,6 +93,7 @@ private: const Symbol *addDeclaration(const NamedDecl &, SymbolID); void addDefinition(const NamedDecl &, const Symbol &DeclSymbol); + void addFileDigest(StringRef URI, const SourceManager &SM, FileID FID); // All Symbols collected from the AST. SymbolSlab::Builder Symbols; @@ -108,6 +111,8 @@ // canonical by clang but should not be considered canonical in the index // unless it's a definition. llvm::DenseMap CanonicalDecls; + // URIs whose digests have been recorded. Reset on finish(). + llvm::DenseSet Digested; }; } // namespace clangd Index: clangd/index/SymbolCollector.cpp =================================================================== --- clangd/index/SymbolCollector.cpp +++ clangd/index/SymbolCollector.cpp @@ -15,12 +15,14 @@ #include "../SourceCode.h" #include "../URI.h" #include "CanonicalIncludes.h" +#include "SymbolYAML.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclTemplate.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Basic/SourceManager.h" #include "clang/Index/IndexSymbol.h" #include "clang/Index/USRGeneration.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" @@ -327,17 +329,24 @@ const NamedDecl &OriginalDecl = *cast(ASTNode.OrigD); const Symbol *BasicSymbol = Symbols.find(ID); + FileID FID = SM.getFileID(findNameLoc(ND)); if (!BasicSymbol) // Regardless of role, ND is the canonical declaration. BasicSymbol = addDeclaration(*ND, std::move(ID)); - else if (isPreferredDeclaration(OriginalDecl, Roles)) + else if (isPreferredDeclaration(OriginalDecl, Roles)) { // If OriginalDecl is preferred, replace the existing canonical // declaration (e.g. a class forward declaration). There should be at most // one duplicate as we expect to see only one preferred declaration per // TU, because in practice they are definitions. BasicSymbol = addDeclaration(OriginalDecl, std::move(ID)); + FID = SM.getFileID(findNameLoc(&OriginalDecl)); + } + + if (!BasicSymbol->CanonicalDeclaration.FileURI.empty()) + addFileDigest(BasicSymbol->CanonicalDeclaration.FileURI, SM, FID); if (Roles & static_cast(index::SymbolRole::Definition)) addDefinition(OriginalDecl, *BasicSymbol); + return true; } @@ -386,10 +395,14 @@ S.IsIndexedForCodeCompletion = true; S.SymInfo = index::getSymbolInfoForMacro(*MI); std::string FileURI; - if (auto DeclLoc = getTokenLocation(MI->getDefinitionLoc(), SM, Opts, - PP->getLangOpts(), FileURI)) + SourceLocation MLoc = MI->getDefinitionLoc(); + if (auto DeclLoc = + getTokenLocation(MLoc, SM, Opts, PP->getLangOpts(), FileURI)) S.CanonicalDeclaration = *DeclLoc; + if (!S.CanonicalDeclaration.FileURI.empty()) + addFileDigest(S.CanonicalDeclaration.FileURI, SM, SM.getFileID(MLoc)); + CodeCompletionResult SymbolCompletion(Name); const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro( *PP, *CompletionAllocator, *CompletionTUInfo); @@ -439,6 +452,8 @@ } ReferencedDecls.clear(); ReferencedMacros.clear(); + CanonicalDecls.clear(); + Digested.clear(); } const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, @@ -514,5 +529,15 @@ Symbols.insert(S); } +void SymbolCollector::addFileDigest(StringRef URI, const SourceManager &SM, + FileID FID) { + if (FID.isInvalid() || !Digested.insert(URI).second) + return; // Ignore if file is invalid or already recorded. + bool Invalid = false; + StringRef Content = SM.getBufferData(FID, &Invalid); + if (!Invalid) + Symbols.addFileDigest(URI, llvm::hash_value(Content)); +} + } // namespace clangd } // namespace clang Index: clangd/index/SymbolYAML.cpp =================================================================== --- clangd/index/SymbolYAML.cpp +++ clangd/index/SymbolYAML.cpp @@ -13,8 +13,23 @@ #include "llvm/Support/Errc.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include + +namespace clangd_yaml_internal { +struct NormalizedDigest { + llvm::StringRef URI; + size_t Digest; +}; + +struct NormalizedSymbolSlab { + std::vector Syms; + std::vector Digests; +}; +} // namespace clangd_yaml_internal LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(clang::clangd::Symbol) +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Symbol) +LLVM_YAML_IS_SEQUENCE_VECTOR(clangd_yaml_internal::NormalizedDigest) namespace llvm { namespace yaml { @@ -22,9 +37,10 @@ using clang::clangd::Symbol; using clang::clangd::SymbolID; using clang::clangd::SymbolLocation; +using clang::clangd::SymbolSlab; using clang::index::SymbolInfo; -using clang::index::SymbolLanguage; using clang::index::SymbolKind; +using clang::index::SymbolLanguage; // Helper to (de)serialize the SymbolID. We serialize it as a hex string. struct NormalizedSymbolID { @@ -162,6 +178,21 @@ } }; +template <> struct MappingTraits { + static void mapping(IO &IO, clangd_yaml_internal::NormalizedDigest &Digest) { + IO.mapRequired("URI", Digest.URI); + IO.mapRequired("Digest", Digest.Digest); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, + clangd_yaml_internal::NormalizedSymbolSlab &NSlab) { + IO.mapRequired("Symbols", NSlab.Syms); + IO.mapRequired("FileDigests", NSlab.Digests); + } +}; + } // namespace yaml } // namespace llvm @@ -172,12 +203,14 @@ // Store data of pointer fields (excl. `StringRef`) like `Detail`. llvm::BumpPtrAllocator Arena; llvm::yaml::Input Yin(YAMLContent, &Arena); - std::vector S; - Yin >> S; + clangd_yaml_internal::NormalizedSymbolSlab NSlab; + Yin >> NSlab; SymbolSlab::Builder Syms; - for (auto &Sym : S) + for (auto &Sym : NSlab.Syms) Syms.insert(Sym); + for (auto &Digest : NSlab.Digests) + Syms.addFileDigest(Digest.URI, Digest.Digest); return std::move(Syms).build(); } @@ -191,8 +224,12 @@ void SymbolsToYAML(const SymbolSlab& Symbols, llvm::raw_ostream &OS) { llvm::yaml::Output Yout(OS); - for (Symbol S : Symbols) // copy: Yout<< requires mutability. - Yout << S; + clangd_yaml_internal::NormalizedSymbolSlab NSlab; + for (auto &Sym : Symbols) + NSlab.Syms.push_back(Sym); + for (auto &Digest : Symbols.fileDigests()) + NSlab.Digests.push_back({Digest.first, Digest.second}); + Yout << NSlab; } std::string SymbolToYAML(Symbol Sym) { Index: clangd/tool/ClangdMain.cpp =================================================================== --- clangd/tool/ClangdMain.cpp +++ clangd/tool/ClangdMain.cpp @@ -40,12 +40,7 @@ llvm::errs() << "Can't open " << YamlSymbolFile << "\n"; return nullptr; } - auto Slab = symbolsFromYAML(Buffer.get()->getBuffer()); - SymbolSlab::Builder SymsBuilder; - for (auto Sym : Slab) - SymsBuilder.insert(Sym); - - return MemIndex::build(std::move(SymsBuilder).build()); + return MemIndex::build(symbolsFromYAML(Buffer.get()->getBuffer())); } } // namespace Index: unittests/clangd/CodeCompleteTests.cpp =================================================================== --- unittests/clangd/CodeCompleteTests.cpp +++ unittests/clangd/CodeCompleteTests.cpp @@ -893,6 +893,11 @@ void lookup(const LookupRequest &, llvm::function_ref) const override {} + void + fileDigests(const FileDigestRequest &, + llvm::function_ref) + const override {} + const std::vector allRequests() const { return Requests; } private: Index: unittests/clangd/FileIndexTests.cpp =================================================================== --- unittests/clangd/FileIndexTests.cpp +++ unittests/clangd/FileIndexTests.cpp @@ -11,10 +11,13 @@ #include "TestFS.h" #include "TestTU.h" #include "index/FileIndex.h" +#include "index/Index.h" +#include "index/MemIndex.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/PCHContainerOperations.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/Hashing.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -40,9 +43,9 @@ } std::vector -getSymbolNames(const std::vector &Symbols) { +getSymbolNames(const MemIndex::SymbolSlabView &Symbols) { std::vector Names; - for (const Symbol *Sym : Symbols) + for (const Symbol *Sym : Symbols.Pointers) Names.push_back(Sym->Name); return Names; } @@ -270,6 +273,38 @@ UnorderedElementsAre("ns_in_header", "ns_in_header::func_in_header")); } +TEST(FileIndexTest, DropSymbolFromIndexedFileWithSameDigest) { + StringRef URI = "unittest:///foo.h"; + StringRef Header = "namespace ns { class Foo {}; }"; + + SymbolSlab::Builder WithSameDigest; + WithSameDigest.addFileDigest(URI, llvm::hash_value(Header)); + auto MemIdx = MemIndex::build(std::move(WithSameDigest).build()); + + FileIndex M(/*URISchemes=*/{"unittest"}, MemIdx.get()); + update(M, "foo", Header); + FuzzyFindRequest Req; + Req.Query = ""; + Req.Scopes = {"ns::"}; + EXPECT_THAT(match(M, Req), UnorderedElementsAre()); +} + +TEST(FileIndexTest, NoDropSymbolFromFileWithDifferentDigest) { + StringRef URI = "unittest:///foo.h"; + StringRef Header = "namespace ns { class Foo {}; }"; + + SymbolSlab::Builder WithDifferentDigest; + WithDifferentDigest.addFileDigest(URI, llvm::hash_value(Header) + 1); + auto MemIdx = MemIndex::build(std::move(WithDifferentDigest).build()); + + FileIndex M(/*URISchemes=*/{"unittest"}, MemIdx.get()); + update(M, "foo", Header); + FuzzyFindRequest Req; + Req.Query = ""; + Req.Scopes = {"ns::"}; + EXPECT_THAT(match(M, Req), UnorderedElementsAre("ns::Foo")); +} + } // namespace } // namespace clangd } // namespace clang Index: unittests/clangd/IndexTests.cpp =================================================================== --- unittests/clangd/IndexTests.cpp +++ unittests/clangd/IndexTests.cpp @@ -54,14 +54,14 @@ struct SlabAndPointers { SymbolSlab Slab; - std::vector Pointers; + MemIndex::SymbolSlabView View; }; // Create a slab of symbols with the given qualified names as both IDs and // names. The life time of the slab is managed by the returned shared pointer. // If \p WeakSymbols is provided, it will be pointed to the managed object in // the returned shared pointer. -std::shared_ptr> +std::shared_ptr generateSymbols(std::vector QualifiedNames, std::weak_ptr *WeakSymbols = nullptr) { SymbolSlab::Builder Slab; @@ -71,16 +71,15 @@ auto Storage = std::make_shared(); Storage->Slab = std::move(Slab).build(); for (const auto &Sym : Storage->Slab) - Storage->Pointers.push_back(&Sym); + Storage->View.Pointers.push_back(&Sym); if (WeakSymbols) *WeakSymbols = Storage; - auto *Pointers = &Storage->Pointers; - return {std::move(Storage), Pointers}; + return {std::move(Storage), &Storage->View}; } // Create a slab of symbols with IDs and names [Begin, End], otherwise identical // to the `generateSymbols` above. -std::shared_ptr> +std::shared_ptr generateNumSymbols(int Begin, int End, std::weak_ptr *WeakSymbols = nullptr) { std::vector Names; @@ -124,9 +123,9 @@ // Inject some duplicates and make sure we only match the same symbol once. auto Sym = symbol("7"); - Symbols->push_back(&Sym); - Symbols->push_back(&Sym); - Symbols->push_back(&Sym); + Symbols->Pointers.push_back(&Sym); + Symbols->Pointers.push_back(&Sym); + Symbols->Pointers.push_back(&Sym); FuzzyFindRequest Req; Req.Query = "7"; Index: unittests/clangd/SymbolCollectorTests.cpp =================================================================== --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -679,73 +679,51 @@ } TEST_F(SymbolCollectorTest, YAMLConversions) { - const std::string YAML1 = R"( ---- -ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 -Name: 'Foo1' -Scope: 'clang::' -SymInfo: - Kind: Function - Lang: Cpp -CanonicalDeclaration: - FileURI: file:///path/foo.h - Start: - Line: 1 - Column: 0 - End: - Line: 1 - Column: 1 -IsIndexedForCodeCompletion: true -Detail: - Documentation: 'Foo doc' - ReturnType: 'int' + const std::string YAML = R"(--- +Symbols: + - ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 + Name: 'Foo1' + Scope: 'clang::' + SymInfo: + Kind: Function + Lang: Cpp + CanonicalDeclaration: + FileURI: file:///path/foo.h + Start: + Line: 1 + Column: 0 + End: + Line: 1 + Column: 1 + IsIndexedForCodeCompletion: true + Detail: + Documentation: 'Foo doc' + ReturnType: 'int' +FileDigests: + - URI: 'file:///a' + Digest: 123 ... )"; - const std::string YAML2 = R"( ---- -ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858 -Name: 'Foo2' -Scope: 'clang::' -SymInfo: - Kind: Function - Lang: Cpp -CanonicalDeclaration: - FileURI: file:///path/bar.h - Start: - Line: 1 - Column: 0 - End: - Line: 1 - Column: 1 -IsIndexedForCodeCompletion: false -Signature: '-sig' -CompletionSnippetSuffix: '-snippet' -... -)"; - - auto Symbols1 = symbolsFromYAML(YAML1); - - EXPECT_THAT(Symbols1, - UnorderedElementsAre(AllOf(QName("clang::Foo1"), Labeled("Foo1"), - Doc("Foo doc"), ReturnType("int"), - DeclURI("file:///path/foo.h"), - ForCodeCompletion(true)))); - auto Symbols2 = symbolsFromYAML(YAML2); - EXPECT_THAT(Symbols2, UnorderedElementsAre(AllOf( - QName("clang::Foo2"), Labeled("Foo2-sig"), - Not(HasReturnType()), DeclURI("file:///path/bar.h"), - ForCodeCompletion(false)))); - - std::string ConcatenatedYAML; - { - llvm::raw_string_ostream OS(ConcatenatedYAML); - SymbolsToYAML(Symbols1, OS); - SymbolsToYAML(Symbols2, OS); - } - auto ConcatenatedSymbols = symbolsFromYAML(ConcatenatedYAML); - EXPECT_THAT(ConcatenatedSymbols, - UnorderedElementsAre(QName("clang::Foo1"), - QName("clang::Foo2"))); + auto Symbols = symbolsFromYAML(YAML); + + auto SymbolsExpected = [](const SymbolSlab &Symbols) { + EXPECT_THAT(Symbols, + UnorderedElementsAre( + AllOf(QName("clang::Foo1"), Labeled("Foo1"), Doc("Foo doc"), + ReturnType("int"), DeclURI("file:///path/foo.h"), + ForCodeCompletion(true)))); + auto D = Symbols.fileDigests().find("file:///a"); + assert(D != Symbols.fileDigests().end()); + EXPECT_EQ(D->second, 123ull); + }; + SymbolsExpected(Symbols); + + // Convert symbols to yaml again and back. + std::string Yout; + llvm::raw_string_ostream OS(Yout); + SymbolsToYAML(Symbols, OS); + OS.flush(); + SymbolsExpected(symbolsFromYAML(Yout)); } TEST_F(SymbolCollectorTest, IncludeHeaderSameAsFileURI) {