Index: clangd/Headers.h =================================================================== --- clangd/Headers.h +++ clangd/Headers.h @@ -48,6 +48,18 @@ }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion&); +// Contains information about one file in the build grpah and its direct +// dependencies. +struct BuildGraphNode { + bool IsTU; + llvm::StringRef FileURI; + FileDigest Digest; + std::vector FileInclusions; +}; +// FileURI and FileInclusions are references to keys of the map containing +// them. +using BuildGraphInclusions = llvm::StringMap; + // Information captured about the inclusion graph in a translation unit. // This includes detailed information about the direct #includes, and summary // information about all transitive includes. Index: clangd/SourceCode.h =================================================================== --- clangd/SourceCode.h +++ clangd/SourceCode.h @@ -23,6 +23,8 @@ namespace clangd { +using FileDigest = std::array; + // Counts the number of UTF-16 code units needed to represent a string (LSP // specifies string lengths in UTF-16 code units). size_t lspLength(StringRef Code); Index: clangd/index/Background.h =================================================================== --- clangd/index/Background.h +++ clangd/index/Background.h @@ -88,7 +88,8 @@ private: /// Given index results from a TU, only update files in \p FilesToUpdate. - void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs, + /// Also stores new index information on IndexStorage. + void update(llvm::StringRef MainFile, IndexFileIn Index, const llvm::StringMap &FilesToUpdate, BackgroundIndexStorage *IndexStorage); Index: clangd/index/Background.cpp =================================================================== --- clangd/index/Background.cpp +++ clangd/index/Background.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/SHA1.h" +#include "llvm/Support/ScopedPrinter.h" #include #include @@ -193,8 +194,7 @@ }; /// Given index results from a TU, only update files in \p FilesToUpdate. -void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, - RefSlab Refs, +void BackgroundIndex::update(StringRef MainFile, IndexFileIn Index, const StringMap &FilesToUpdate, BackgroundIndexStorage *IndexStorage) { // Partition symbols/references into files. @@ -204,7 +204,7 @@ }; StringMap Files; URIToFileCache URICache(MainFile); - for (const auto &Sym : Symbols) { + for (const auto &Sym : *Index.Symbols) { if (Sym.CanonicalDeclaration) { auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI); if (FilesToUpdate.count(DeclPath) != 0) @@ -222,7 +222,7 @@ } } DenseMap RefToIDs; - for (const auto &SymRefs : Refs) { + for (const auto &SymRefs : *Index.Refs) { for (const auto &R : SymRefs.second) { auto Path = URICache.resolve(R.Location.FileURI); if (FilesToUpdate.count(Path) != 0) { @@ -250,12 +250,11 @@ auto Hash = FilesToUpdate.lookup(Path); // We need to store shards before updating the index, since the latter // consumes slabs. - // FIXME: Store Hash in the Shard. if (IndexStorage) { IndexFileOut Shard; Shard.Symbols = SS.get(); Shard.Refs = RS.get(); - Shard.Digest = &Hash; + if (auto Error = IndexStorage->storeShard(Path, Shard)) elog("Failed to write background-index shard for file {0}: {1}", Path, std::move(Error)); @@ -375,8 +374,11 @@ Symbols.size(), Refs.numRefs()); SPAN_ATTACH(Tracer, "symbols", int(Symbols.size())); SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs())); - update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate, - IndexStorage); + IndexFileIn Index; + Index.Symbols = std::move(Symbols); + Index.Refs = std::move(Refs); + + update(AbsolutePath, std::move(Index), FilesToUpdate, IndexStorage); { // Make sure hash for the main file is always updated even if there is no // index data in it. Index: clangd/index/Serialization.h =================================================================== --- clangd/index/Serialization.h +++ clangd/index/Serialization.h @@ -24,6 +24,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H +#include "Headers.h" #include "Index.h" #include "llvm/Support/Error.h" @@ -37,11 +38,10 @@ // Holds the contents of an index file that was read. struct IndexFileIn { - using FileDigest = std::array; llvm::Optional Symbols; llvm::Optional Refs; - // Digest of the source file that generated the contents. - llvm::Optional Digest; + // Keys are URIs of the source files. + llvm::Optional Sources; }; // Parse an index file. The input must be a RIFF or YAML file. llvm::Expected readIndexFile(llvm::StringRef); @@ -50,8 +50,8 @@ struct IndexFileOut { const SymbolSlab *Symbols = nullptr; const RefSlab *Refs = nullptr; - // Digest of the source file that generated the contents. - const IndexFileIn::FileDigest *Digest = nullptr; + // Keys are URIs of the source files. + const BuildGraphInclusions *Sources = nullptr; // TODO: Support serializing Dex posting lists. IndexFileFormat Format = IndexFileFormat::RIFF; Index: clangd/index/Serialization.cpp =================================================================== --- clangd/index/Serialization.cpp +++ clangd/index/Serialization.cpp @@ -247,6 +247,31 @@ return Loc; } +BuildGraphNode +readBuildGraphNode(Reader &Data, llvm::ArrayRef Strings) { + BuildGraphNode BGN; + BGN.IsTU = Data.consume8(); + BGN.FileURI = Data.consumeString(Strings); + llvm::StringRef Digest = Data.consume(BGN.Digest.size()); + std::copy(Digest.bytes_begin(), Digest.bytes_end(), BGN.Digest.begin()); + BGN.FileInclusions.resize(Data.consumeVar()); + for (llvm::StringRef& Include : BGN.FileInclusions) + Include = Data.consumeString(Strings); + return BGN; +} + +void writeBuildGraphNode(const BuildGraphNode &BGN, + const StringTableOut &Strings, raw_ostream &OS) { + OS.write(BGN.IsTU); + writeVar(Strings.index(BGN.FileURI), OS); + llvm::StringRef Hash(reinterpret_cast(BGN.Digest.data()), + BGN.Digest.size()); + OS << Hash; + writeVar(BGN.FileInclusions.size(), OS); + for (llvm::StringRef Include : BGN.FileInclusions) + writeVar(Strings.index(Include), OS); +} + void writeSymbol(const Symbol &Sym, const StringTableOut &Strings, raw_ostream &OS) { OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists, @@ -333,7 +358,7 @@ // A file is a RIFF chunk with type 'CdIx'. // It contains the sections: // - meta: version number -// - srcs: checksum of the source file +// - srcs: information related to build graph // - stri: string table // - symb: symbols // - refs: references to symbols @@ -367,10 +392,20 @@ IndexFileIn Result; if (Chunks.count("srcs")) { - Reader Hash(Chunks.lookup("srcs")); - Result.Digest.emplace(); - llvm::StringRef Digest = Hash.consume(Result.Digest->size()); - std::copy(Digest.bytes_begin(), Digest.bytes_end(), Result.Digest->begin()); + Reader SrcsReader(Chunks.lookup("srcs")); + Result.Sources.emplace(); + while (!SrcsReader.eof()) { + auto BGN = readBuildGraphNode(SrcsReader, Strings->Strings); + auto Entry = Result.Sources->try_emplace(BGN.FileURI).first; + Entry->getValue() = std::move(BGN); + // We change all the strings inside the structure to point at the keys in + // the map, since it is the only copy of the string that's going to live. + Entry->getValue().FileURI = Entry->getKey(); + for(auto &Include : Entry->getValue().FileInclusions) + Include = Result.Sources->try_emplace(Include).first->getKey(); + } + if (SrcsReader.err()) + return makeError("malformed or truncated include uri"); } if (Chunks.count("symb")) { @@ -397,6 +432,13 @@ return std::move(Result); } +template +void visitStrings(BuildGraphNode &BGN, const Callback &CB) { + CB(BGN.FileURI); + for (llvm::StringRef &Include : BGN.FileInclusions) + CB(Include); +} + void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) { assert(Data.Symbols && "An index file without symbols makes no sense!"); riff::File RIFF; @@ -409,18 +451,19 @@ } RIFF.Chunks.push_back({riff::fourCC("meta"), Meta}); - if (Data.Digest) { - llvm::StringRef Hash(reinterpret_cast(Data.Digest->data()), - Data.Digest->size()); - RIFF.Chunks.push_back({riff::fourCC("srcs"), Hash}); - } - StringTableOut Strings; std::vector Symbols; for (const auto &Sym : *Data.Symbols) { Symbols.emplace_back(Sym); visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); }); } + std::vector Sources; + if (Data.Sources) + for (const auto &Source : *Data.Sources) { + Sources.push_back(Source.getValue()); + visitStrings(Sources.back(), [&](StringRef &S) { Strings.intern(S); }); + } + std::vector>> Refs; if (Data.Refs) { for (const auto &Sym : *Data.Refs) { @@ -458,6 +501,16 @@ RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection}); } + std::string SrcsSection; + { + { + raw_string_ostream SrcsOS(SrcsSection); + for (const auto &SF : Sources) + writeBuildGraphNode(SF, Strings, SrcsOS); + } + RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection}); + } + OS << RIFF; } Index: unittests/clangd/BackgroundIndexTests.cpp =================================================================== --- unittests/clangd/BackgroundIndexTests.cpp +++ unittests/clangd/BackgroundIndexTests.cpp @@ -124,8 +124,6 @@ )cpp"; std::string A_CC = "#include \"A.h\"\nvoid g() { (void)common; }"; FS.Files[testPath("root/A.cc")] = A_CC; - auto Digest = llvm::SHA1::hash( - {reinterpret_cast(A_CC.data()), A_CC.size()}); llvm::StringMap Storage; size_t CacheHits = 0; @@ -160,7 +158,6 @@ EXPECT_NE(ShardSource, nullptr); EXPECT_THAT(*ShardSource->Symbols, UnorderedElementsAre()); EXPECT_THAT(*ShardSource->Refs, RefsAre({FileURI("unittest:///root/A.cc")})); - EXPECT_EQ(*ShardSource->Digest, Digest); } } // namespace clangd Index: unittests/clangd/SerializationTests.cpp =================================================================== --- unittests/clangd/SerializationTests.cpp +++ unittests/clangd/SerializationTests.cpp @@ -173,31 +173,44 @@ UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); } -TEST(SerializationTest, HashTest) { +TEST(SerializationTest, SrcsTest) { auto In = readIndexFile(YAML); EXPECT_TRUE(bool(In)) << In.takeError(); - std::string TestContent("TESTCONTENT"); - auto Digest = + std::string TestContent("TestContent"); + BuildGraphNode BGN; + BGN.Digest = llvm::SHA1::hash({reinterpret_cast(TestContent.data()), TestContent.size()}); + BGN.FileInclusions = {"inc1", "inc2"}; + BGN.FileURI = "FileURI"; + BGN.IsTU = true; + BuildGraphInclusions Sources; + Sources[BGN.FileURI] = BGN; // Write to binary format, and parse again. IndexFileOut Out(*In); Out.Format = IndexFileFormat::RIFF; - Out.Digest = &Digest; - std::string Serialized = to_string(Out); - - auto In2 = readIndexFile(Serialized); - ASSERT_TRUE(bool(In2)) << In.takeError(); - ASSERT_EQ(In2->Digest, Digest); - ASSERT_TRUE(In2->Symbols); - ASSERT_TRUE(In2->Refs); - - // Assert the YAML serializations match, for nice comparisons and diffs. - EXPECT_THAT(YAMLFromSymbols(*In2->Symbols), - UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); - EXPECT_THAT(YAMLFromRefs(*In2->Refs), - UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); + Out.Sources = &Sources; + { + std::string Serialized = to_string(Out); + + auto In = readIndexFile(Serialized); + ASSERT_TRUE(bool(In)) << In.takeError(); + ASSERT_TRUE(In->Symbols); + ASSERT_TRUE(In->Refs); + ASSERT_TRUE(In->Sources); + ASSERT_TRUE(In->Sources->count(BGN.FileURI)); + // Assert the YAML serializations match, for nice comparisons and diffs. + EXPECT_THAT(YAMLFromSymbols(*In->Symbols), + UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); + EXPECT_THAT(YAMLFromRefs(*In->Refs), + UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); + auto BGNDeserialized = In->Sources->lookup(BGN.FileURI); + EXPECT_EQ(BGNDeserialized.Digest, BGN.Digest); + EXPECT_EQ(BGNDeserialized.FileInclusions, BGN.FileInclusions); + EXPECT_EQ(BGNDeserialized.FileURI, BGN.FileURI); + EXPECT_EQ(BGNDeserialized.IsTU, BGN.IsTU); + } } } // namespace