Index: clangd/Headers.h =================================================================== --- clangd/Headers.h +++ clangd/Headers.h @@ -48,6 +48,20 @@ }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion&); +// Contains information about one file in the build grpah and its direct +// dependencies. Doesn't own the strings it references (IncludeGraph is +// self-contained). +struct IncludeGraphNode { + // True if current file is a main file rather than a header. + bool IsTU; + llvm::StringRef URI; + FileDigest Digest; + std::vector DirectIncludes; +}; +// FileURI and FileInclusions are references to keys of the map containing +// them. +using IncludeGraph = llvm::StringMap; + // Information captured about the inclusion graph in a translation unit. // This includes detailed information about the direct #includes, and summary // information about all transitive includes. Index: clangd/SourceCode.h =================================================================== --- clangd/SourceCode.h +++ clangd/SourceCode.h @@ -16,13 +16,22 @@ #include "Protocol.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/Support/SHA1.h" namespace clang { class SourceManager; namespace clangd { +// We tend to generate digests for source codes in a lot of different places. +// This represents the type for those digests to prevent us hard coding details +// of hashing function at every place that needs to store this information. +using FileDigest = decltype(llvm::SHA1::hash({})); +FileDigest digest(StringRef Content); +Optional digestFile(const SourceManager &SM, FileID FID); + // Counts the number of UTF-16 code units needed to represent a string (LSP // specifies string lengths in UTF-16 code units). size_t lspLength(StringRef Code); Index: clangd/SourceCode.cpp =================================================================== --- clangd/SourceCode.cpp +++ clangd/SourceCode.cpp @@ -227,5 +227,17 @@ Left.end.character == Right.start.character; } +FileDigest digest(StringRef Content) { + return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()}); +} + +Optional digestFile(const SourceManager &SM, FileID FID) { + bool Invalid = false; + StringRef Content = SM.getBufferData(FID, &Invalid); + if (Invalid) + return None; + return digest(Content); +} + } // namespace clangd } // namespace clang Index: clangd/index/Background.h =================================================================== --- clangd/index/Background.h +++ clangd/index/Background.h @@ -84,11 +84,10 @@ LLVM_NODISCARD bool blockUntilIdleForTest(llvm::Optional TimeoutSeconds = 10); - using FileDigest = decltype(llvm::SHA1::hash({})); - private: /// Given index results from a TU, only update files in \p FilesToUpdate. - void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs, + /// Also stores new index information on IndexStorage. + void update(llvm::StringRef MainFile, IndexFileIn Index, const llvm::StringMap &FilesToUpdate, BackgroundIndexStorage *IndexStorage); Index: clangd/index/Background.cpp =================================================================== --- clangd/index/Background.cpp +++ clangd/index/Background.cpp @@ -11,6 +11,7 @@ #include "ClangdUnit.h" #include "Compiler.h" #include "Logger.h" +#include "SourceCode.h" #include "Threading.h" #include "Trace.h" #include "URI.h" @@ -149,19 +150,6 @@ QueueCV.notify_all(); } -static BackgroundIndex::FileDigest digest(StringRef Content) { - return SHA1::hash({(const uint8_t *)Content.data(), Content.size()}); -} - -static Optional digestFile(const SourceManager &SM, - FileID FID) { - bool Invalid = false; - StringRef Content = SM.getBufferData(FID, &Invalid); - if (Invalid) - return None; - return digest(Content); -} - // Resolves URI to file paths with cache. class URIToFileCache { public: @@ -193,8 +181,7 @@ }; /// Given index results from a TU, only update files in \p FilesToUpdate. -void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, - RefSlab Refs, +void BackgroundIndex::update(StringRef MainFile, IndexFileIn Index, const StringMap &FilesToUpdate, BackgroundIndexStorage *IndexStorage) { // Partition symbols/references into files. @@ -204,7 +191,7 @@ }; StringMap Files; URIToFileCache URICache(MainFile); - for (const auto &Sym : Symbols) { + for (const auto &Sym : *Index.Symbols) { if (Sym.CanonicalDeclaration) { auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI); if (FilesToUpdate.count(DeclPath) != 0) @@ -222,7 +209,7 @@ } } DenseMap RefToIDs; - for (const auto &SymRefs : Refs) { + for (const auto &SymRefs : *Index.Refs) { for (const auto &R : SymRefs.second) { auto Path = URICache.resolve(R.Location.FileURI); if (FilesToUpdate.count(Path) != 0) { @@ -250,12 +237,11 @@ auto Hash = FilesToUpdate.lookup(Path); // We need to store shards before updating the index, since the latter // consumes slabs. - // FIXME: Store Hash in the Shard. if (IndexStorage) { IndexFileOut Shard; Shard.Symbols = SS.get(); Shard.Refs = RS.get(); - Shard.Digest = &Hash; + if (auto Error = IndexStorage->storeShard(Path, Shard)) elog("Failed to write background-index shard for file {0}: {1}", Path, std::move(Error)); @@ -275,8 +261,8 @@ // \p FileDigests contains file digests for the current indexed files, and all // changed files will be added to \p FilesToUpdate. decltype(SymbolCollector::Options::FileFilter) createFileFilter( - const llvm::StringMap &FileDigests, - llvm::StringMap &FilesToUpdate) { + const llvm::StringMap &FileDigests, + llvm::StringMap &FilesToUpdate) { return [&FileDigests, &FilesToUpdate](const SourceManager &SM, FileID FID) { StringRef Path; if (const auto *F = SM.getFileEntryForID(FID)) @@ -375,8 +361,11 @@ Symbols.size(), Refs.numRefs()); SPAN_ATTACH(Tracer, "symbols", int(Symbols.size())); SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs())); - update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate, - IndexStorage); + IndexFileIn Index; + Index.Symbols = std::move(Symbols); + Index.Refs = std::move(Refs); + + update(AbsolutePath, std::move(Index), FilesToUpdate, IndexStorage); { // Make sure hash for the main file is always updated even if there is no // index data in it. Index: clangd/index/Serialization.h =================================================================== --- clangd/index/Serialization.h +++ clangd/index/Serialization.h @@ -24,6 +24,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H +#include "Headers.h" #include "Index.h" #include "llvm/Support/Error.h" @@ -37,11 +38,10 @@ // Holds the contents of an index file that was read. struct IndexFileIn { - using FileDigest = std::array; llvm::Optional Symbols; llvm::Optional Refs; - // Digest of the source file that generated the contents. - llvm::Optional Digest; + // Keys are URIs of the source files. + llvm::Optional Sources; }; // Parse an index file. The input must be a RIFF or YAML file. llvm::Expected readIndexFile(llvm::StringRef); @@ -50,8 +50,8 @@ struct IndexFileOut { const SymbolSlab *Symbols = nullptr; const RefSlab *Refs = nullptr; - // Digest of the source file that generated the contents. - const IndexFileIn::FileDigest *Digest = nullptr; + // Keys are URIs of the source files. + const IncludeGraph *Sources = nullptr; // TODO: Support serializing Dex posting lists. IndexFileFormat Format = IndexFileFormat::RIFF; Index: clangd/index/Serialization.cpp =================================================================== --- clangd/index/Serialization.cpp +++ clangd/index/Serialization.cpp @@ -247,6 +247,31 @@ return Loc; } +IncludeGraphNode readIncludeGraphNode(Reader &Data, + llvm::ArrayRef Strings) { + IncludeGraphNode IGN; + IGN.IsTU = Data.consume8(); + IGN.URI = Data.consumeString(Strings); + llvm::StringRef Digest = Data.consume(IGN.Digest.size()); + std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin()); + IGN.DirectIncludes.resize(Data.consumeVar()); + for (llvm::StringRef &Include : IGN.DirectIncludes) + Include = Data.consumeString(Strings); + return IGN; +} + +void writeIncludeGraphNode(const IncludeGraphNode &IGN, + const StringTableOut &Strings, raw_ostream &OS) { + OS.write(IGN.IsTU); + writeVar(Strings.index(IGN.URI), OS); + llvm::StringRef Hash(reinterpret_cast(IGN.Digest.data()), + IGN.Digest.size()); + OS << Hash; + writeVar(IGN.DirectIncludes.size(), OS); + for (llvm::StringRef Include : IGN.DirectIncludes) + writeVar(Strings.index(Include), OS); +} + void writeSymbol(const Symbol &Sym, const StringTableOut &Strings, raw_ostream &OS) { OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists, @@ -333,7 +358,7 @@ // A file is a RIFF chunk with type 'CdIx'. // It contains the sections: // - meta: version number -// - srcs: checksum of the source file +// - srcs: information related to include graph // - stri: string table // - symb: symbols // - refs: references to symbols @@ -367,10 +392,20 @@ IndexFileIn Result; if (Chunks.count("srcs")) { - Reader Hash(Chunks.lookup("srcs")); - Result.Digest.emplace(); - llvm::StringRef Digest = Hash.consume(Result.Digest->size()); - std::copy(Digest.bytes_begin(), Digest.bytes_end(), Result.Digest->begin()); + Reader SrcsReader(Chunks.lookup("srcs")); + Result.Sources.emplace(); + while (!SrcsReader.eof()) { + auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings); + auto Entry = Result.Sources->try_emplace(IGN.URI).first; + Entry->getValue() = std::move(IGN); + // We change all the strings inside the structure to point at the keys in + // the map, since it is the only copy of the string that's going to live. + Entry->getValue().URI = Entry->getKey(); + for (auto &Include : Entry->getValue().DirectIncludes) + Include = Result.Sources->try_emplace(Include).first->getKey(); + } + if (SrcsReader.err()) + return makeError("malformed or truncated include uri"); } if (Chunks.count("symb")) { @@ -397,6 +432,13 @@ return std::move(Result); } +template +void visitStrings(IncludeGraphNode &IGN, const Callback &CB) { + CB(IGN.URI); + for (llvm::StringRef &Include : IGN.DirectIncludes) + CB(Include); +} + void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) { assert(Data.Symbols && "An index file without symbols makes no sense!"); riff::File RIFF; @@ -409,18 +451,19 @@ } RIFF.Chunks.push_back({riff::fourCC("meta"), Meta}); - if (Data.Digest) { - llvm::StringRef Hash(reinterpret_cast(Data.Digest->data()), - Data.Digest->size()); - RIFF.Chunks.push_back({riff::fourCC("srcs"), Hash}); - } - StringTableOut Strings; std::vector Symbols; for (const auto &Sym : *Data.Symbols) { Symbols.emplace_back(Sym); visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); }); } + std::vector Sources; + if (Data.Sources) + for (const auto &Source : *Data.Sources) { + Sources.push_back(Source.getValue()); + visitStrings(Sources.back(), [&](StringRef &S) { Strings.intern(S); }); + } + std::vector>> Refs; if (Data.Refs) { for (const auto &Sym : *Data.Refs) { @@ -458,6 +501,16 @@ RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection}); } + std::string SrcsSection; + { + { + raw_string_ostream SrcsOS(SrcsSection); + for (const auto &SF : Sources) + writeIncludeGraphNode(SF, Strings, SrcsOS); + } + RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection}); + } + OS << RIFF; } Index: unittests/clangd/BackgroundIndexTests.cpp =================================================================== --- unittests/clangd/BackgroundIndexTests.cpp +++ unittests/clangd/BackgroundIndexTests.cpp @@ -129,8 +129,6 @@ )cpp"; std::string A_CC = "#include \"A.h\"\nvoid g() { (void)common; }"; FS.Files[testPath("root/A.cc")] = A_CC; - auto Digest = llvm::SHA1::hash( - {reinterpret_cast(A_CC.data()), A_CC.size()}); llvm::StringMap Storage; size_t CacheHits = 0; @@ -165,7 +163,6 @@ EXPECT_NE(ShardSource, nullptr); EXPECT_THAT(*ShardSource->Symbols, UnorderedElementsAre()); EXPECT_THAT(*ShardSource->Refs, RefsAre({FileURI("unittest:///root/A.cc")})); - EXPECT_EQ(*ShardSource->Digest, Digest); } } // namespace clangd Index: unittests/clangd/SerializationTests.cpp =================================================================== --- unittests/clangd/SerializationTests.cpp +++ unittests/clangd/SerializationTests.cpp @@ -173,31 +173,44 @@ UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); } -TEST(SerializationTest, HashTest) { +TEST(SerializationTest, SrcsTest) { auto In = readIndexFile(YAML); EXPECT_TRUE(bool(In)) << In.takeError(); - std::string TestContent("TESTCONTENT"); - auto Digest = + std::string TestContent("TestContent"); + IncludeGraphNode IGN; + IGN.Digest = llvm::SHA1::hash({reinterpret_cast(TestContent.data()), TestContent.size()}); + IGN.DirectIncludes = {"inc1", "inc2"}; + IGN.URI = "URI"; + IGN.IsTU = true; + IncludeGraph Sources; + Sources[IGN.URI] = IGN; // Write to binary format, and parse again. IndexFileOut Out(*In); Out.Format = IndexFileFormat::RIFF; - Out.Digest = &Digest; - std::string Serialized = to_string(Out); - - auto In2 = readIndexFile(Serialized); - ASSERT_TRUE(bool(In2)) << In.takeError(); - ASSERT_EQ(In2->Digest, Digest); - ASSERT_TRUE(In2->Symbols); - ASSERT_TRUE(In2->Refs); - - // Assert the YAML serializations match, for nice comparisons and diffs. - EXPECT_THAT(YAMLFromSymbols(*In2->Symbols), - UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); - EXPECT_THAT(YAMLFromRefs(*In2->Refs), - UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); + Out.Sources = &Sources; + { + std::string Serialized = to_string(Out); + + auto In = readIndexFile(Serialized); + ASSERT_TRUE(bool(In)) << In.takeError(); + ASSERT_TRUE(In->Symbols); + ASSERT_TRUE(In->Refs); + ASSERT_TRUE(In->Sources); + ASSERT_TRUE(In->Sources->count(IGN.URI)); + // Assert the YAML serializations match, for nice comparisons and diffs. + EXPECT_THAT(YAMLFromSymbols(*In->Symbols), + UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); + EXPECT_THAT(YAMLFromRefs(*In->Refs), + UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); + auto IGNDeserialized = In->Sources->lookup(IGN.URI); + EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest); + EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes); + EXPECT_EQ(IGNDeserialized.URI, IGN.URI); + EXPECT_EQ(IGNDeserialized.IsTU, IGN.IsTU); + } } } // namespace