Index: clangd/index/Background.h =================================================================== --- clangd/index/Background.h +++ clangd/index/Background.h @@ -87,7 +87,8 @@ private: /// Given index results from a TU, only update files in \p FilesToUpdate. - void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs, + /// Also stores new index information on IndexStorage. + void update(llvm::StringRef MainFile, IndexFileIn Index, const llvm::StringMap &FilesToUpdate, BackgroundIndexStorage *IndexStorage); Index: clangd/index/Background.cpp =================================================================== --- clangd/index/Background.cpp +++ clangd/index/Background.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/SHA1.h" +#include "llvm/Support/ScopedPrinter.h" #include #include @@ -183,9 +184,43 @@ llvm::StringMap URIToPathCache; }; +llvm::StringMap +FilterSources(llvm::StringRef Path, + const llvm::StringMap& FullSources, + const std::vector& URISchemes) { + llvm::StringMap Filtered; + + std::string FileURI; + if (auto U = URI::create(Path, URISchemes)) + FileURI = U->toString(); + else { + elog("Failed to convert {0} into URI.", Path); + return Filtered; + } + std::queue Dependencies; + Dependencies.push(FileURI); + while(!Dependencies.empty()) { + llvm::StringRef Dependency = Dependencies.front(); + Dependencies.pop(); + + auto Entry = Filtered.try_emplace(Dependency).first; + auto &SF = Entry->getValue(); + SF = FullSources.lookup(Entry->getKey()); + SF.FileURI = Entry->getKey(); + + for (auto &Include : SF.DirectIncludes) { + auto I = Filtered.try_emplace(Include); + Include = I.first->getKey(); + if (I.second) // Insert each dependency only once. + Dependencies.push(Include); + } + } + + return Filtered; +} + /// Given index results from a TU, only update files in \p FilesToUpdate. -void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, - RefSlab Refs, +void BackgroundIndex::update(StringRef MainFile, IndexFileIn Index, const StringMap &FilesToUpdate, BackgroundIndexStorage *IndexStorage) { // Partition symbols/references into files. @@ -195,7 +230,7 @@ }; StringMap Files; URIToFileCache URICache(MainFile); - for (const auto &Sym : Symbols) { + for (const auto &Sym : *Index.Symbols) { if (Sym.CanonicalDeclaration) { auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI); if (FilesToUpdate.count(DeclPath) != 0) @@ -213,7 +248,7 @@ } } DenseMap RefToIDs; - for (const auto &SymRefs : Refs) { + for (const auto &SymRefs : *Index.Refs) { for (const auto &R : SymRefs.second) { auto Path = URICache.resolve(R.Location.FileURI); if (FilesToUpdate.count(Path) != 0) { @@ -237,16 +272,21 @@ auto SS = llvm::make_unique(std::move(Syms).build()); auto RS = llvm::make_unique(std::move(Refs).build()); + std::unique_ptr> Srcs; auto Hash = FilesToUpdate.lookup(Path); // We need to store shards before updating the index, since the latter // consumes slabs. - // FIXME: Store Hash in the Shard. if (IndexStorage) { IndexFileOut Shard; Shard.Symbols = SS.get(); Shard.Refs = RS.get(); - Shard.Digest = &Hash; + if (Index.Sources) { + Srcs = llvm::make_unique>( + FilterSources(Path, Index.Sources.getValue(), URISchemes)); + Shard.Sources = Srcs.get(); + } + if (auto Error = IndexStorage->storeShard(Path, Shard)) elog("Failed to write background-index shard for file {0}: {1}", Path, std::move(Error)); @@ -293,6 +333,55 @@ }; } +llvm::StringMap +IncludeStructureToSources(llvm::StringRef MainFile, + const IncludeStructure &Includes, + const std::vector &URISchemes, + llvm::IntrusiveRefCntPtr FS) { + llvm::StringMap Sources; + + std::queue Dependencies; + Dependencies.push(MainFile); + while(!Dependencies.empty()) { + std::string Dependency = std::move(Dependencies.front()); + Dependencies.pop(); + + std::string FileURI; + if (auto U = URI::create(Dependency, URISchemes)) + FileURI = U->toString(); + else { + elog("Failed to convert {0} into URI.", Dependency); + continue; + } + auto Buf = FS->getBufferForFile(Dependency); + if (!Buf) + continue; + auto Digest = digest(Buf->get()->getBuffer()); + + auto Entry = Sources.try_emplace(FileURI).first; + auto &SF = Entry->getValue(); + SF.IsTU = Dependency == MainFile; + SF.FileURI = Entry->getKey(); + SF.Digest = std::move(Digest); + for (const auto &Include : Includes.includeDepth(Dependency)) { + if (Include.getValue() != 1) // Skip transitive includes. + continue; + if (auto U = URI::create(Include.getKey(), URISchemes)) + FileURI = U->toString(); + else { + elog("Failed to convert {0} into URI.", Dependency); + continue; + } + auto I = Sources.try_emplace(FileURI); + SF.DirectIncludes.push_back(I.first->getKey()); + if (I.second) // Insert each dependency only once. + Dependencies.push(Include.getKey()); + } + } + + return Sources; +} + Error BackgroundIndex::index(tooling::CompileCommand Cmd, BackgroundIndexStorage *IndexStorage) { trace::Span Tracer("BackgroundIndex"); @@ -359,16 +448,28 @@ if (!Action->BeginSourceFile(*Clang, Input)) return createStringError(inconvertibleErrorCode(), "BeginSourceFile() failed"); + + IncludeStructure Includes; + Clang->getPreprocessor().addPPCallbacks( + collectIncludeStructureCallback(Clang->getSourceManager(), &Includes)); + if (!Action->Execute()) return createStringError(inconvertibleErrorCode(), "Execute() failed"); Action->EndSourceFile(); + llvm::StringMap Sources = + IncludeStructureToSources(AbsolutePath, Includes, URISchemes, Inputs.FS); + log("Indexed {0} ({1} symbols, {2} refs)", Inputs.CompileCommand.Filename, Symbols.size(), Refs.numRefs()); SPAN_ATTACH(Tracer, "symbols", int(Symbols.size())); SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs())); - update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate, - IndexStorage); + IndexFileIn Index; + Index.Symbols = std::move(Symbols); + Index.Refs = std::move(Refs); + Index.Sources = std::move(Sources); + + update(AbsolutePath, std::move(Index), FilesToUpdate, IndexStorage); { // Make sure hash for the main file is always updated even if there is no // index data in it. Index: clangd/index/Serialization.h =================================================================== --- clangd/index/Serialization.h +++ clangd/index/Serialization.h @@ -38,10 +38,19 @@ // Holds the contents of an index file that was read. struct IndexFileIn { using FileDigest = std::array; + // Contains information about one source file that participates in current + // index file. + struct SourceFile { + bool IsTU; + llvm::StringRef FileURI; + FileDigest Digest; + std::vector DirectIncludes; + }; + llvm::Optional Symbols; llvm::Optional Refs; - // Digest of the source file that generated the contents. - llvm::Optional Digest; + // Keys are URIs of the source files. + llvm::Optional> Sources; }; // Parse an index file. The input must be a RIFF or YAML file. llvm::Expected readIndexFile(llvm::StringRef); @@ -50,8 +59,9 @@ struct IndexFileOut { const SymbolSlab *Symbols = nullptr; const RefSlab *Refs = nullptr; - // Digest of the source file that generated the contents. - const IndexFileIn::FileDigest *Digest = nullptr; + // Information about the source files participated in current index file. + // Keys are URIs of the source files. + const llvm::StringMap *Sources = nullptr; // TODO: Support serializing Dex posting lists. IndexFileFormat Format = IndexFileFormat::RIFF; Index: clangd/index/Serialization.cpp =================================================================== --- clangd/index/Serialization.cpp +++ clangd/index/Serialization.cpp @@ -247,6 +247,32 @@ return Loc; } +IndexFileIn::SourceFile +readSourceFile(Reader &Data, llvm::ArrayRef Strings) { + IndexFileIn::SourceFile SF; + SF.IsTU = Data.consume8(); + SF.FileURI = Data.consumeString(Strings); + llvm::StringRef Digest = Data.consume(SF.Digest.size()); + std::copy(Digest.bytes_begin(), Digest.bytes_end(), SF.Digest.begin()); + SF.DirectIncludes.resize(Data.consumeVar()); + + for (llvm::StringRef& Include : SF.DirectIncludes) + Include = Data.consumeString(Strings); + return SF; +} + +void writeSourceFile(const IndexFileIn::SourceFile &SF, + const StringTableOut &Strings, raw_ostream &OS) { + OS.write(SF.IsTU); + writeVar(Strings.index(SF.FileURI), OS); + llvm::StringRef Hash(reinterpret_cast(SF.Digest.data()), + SF.Digest.size()); + OS << Hash; + writeVar(SF.DirectIncludes.size(), OS); + for (llvm::StringRef Include : SF.DirectIncludes) + writeVar(Strings.index(Include), OS); +} + void writeSymbol(const Symbol &Sym, const StringTableOut &Strings, raw_ostream &OS) { OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists, @@ -331,7 +357,7 @@ // A file is a RIFF chunk with type 'CdIx'. // It contains the sections: // - meta: version number -// - srcs: checksum of the source file +// - srcs: information related to source files // - stri: string table // - symb: symbols // - refs: references to symbols @@ -365,10 +391,18 @@ IndexFileIn Result; if (Chunks.count("srcs")) { - Reader Hash(Chunks.lookup("srcs")); - Result.Digest.emplace(); - llvm::StringRef Digest = Hash.consume(Result.Digest->size()); - std::copy(Digest.bytes_begin(), Digest.bytes_end(), Result.Digest->begin()); + Reader SrcsReader(Chunks.lookup("srcs")); + Result.Sources.emplace(); + while (!SrcsReader.eof()) { + auto SF = readSourceFile(SrcsReader, Strings->Strings); + auto Entry = Result.Sources->try_emplace(SF.FileURI).first; + Entry->getValue() = std::move(SF); + Entry->getValue().FileURI = Entry->getKey(); + for(auto &Include : Entry->getValue().DirectIncludes) + Include = Result.Sources->try_emplace(Include).first->getKey(); + } + if (SrcsReader.err()) + return makeError("malformed or truncated include uri"); } if (Chunks.count("symb")) { @@ -395,6 +429,13 @@ return std::move(Result); } +template +void visitStrings(IndexFileIn::SourceFile &SF, const Callback &CB) { + CB(SF.FileURI); + for (llvm::StringRef &Include : SF.DirectIncludes) + CB(Include); +} + void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) { assert(Data.Symbols && "An index file without symbols makes no sense!"); riff::File RIFF; @@ -407,18 +448,19 @@ } RIFF.Chunks.push_back({riff::fourCC("meta"), Meta}); - if (Data.Digest) { - llvm::StringRef Hash(reinterpret_cast(Data.Digest->data()), - Data.Digest->size()); - RIFF.Chunks.push_back({riff::fourCC("srcs"), Hash}); - } - StringTableOut Strings; std::vector Symbols; for (const auto &Sym : *Data.Symbols) { Symbols.emplace_back(Sym); visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); }); } + std::vector Sources; + if (Data.Sources) + for (const auto &Source : *Data.Sources) { + Sources.push_back(Source.getValue()); + visitStrings(Sources.back(), [&](StringRef &S) { Strings.intern(S); }); + } + std::vector>> Refs; if (Data.Refs) { for (const auto &Sym : *Data.Refs) { @@ -456,6 +498,16 @@ RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection}); } + std::string SrcsSection; + { + { + raw_string_ostream SrcsOS(SrcsSection); + for (const auto &SF : Sources) + writeSourceFile(SF, Strings, SrcsOS); + } + RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection}); + } + OS << RIFF; } Index: unittests/clangd/BackgroundIndexTests.cpp =================================================================== --- unittests/clangd/BackgroundIndexTests.cpp +++ unittests/clangd/BackgroundIndexTests.cpp @@ -158,7 +158,50 @@ EXPECT_NE(ShardSource, nullptr); EXPECT_THAT(*ShardSource->Symbols, UnorderedElementsAre()); EXPECT_THAT(*ShardSource->Refs, RefsAre({FileURI("unittest:///root/A.cc")})); - EXPECT_EQ(*ShardSource->Digest, Digest); + EXPECT_EQ(ShardSource->Sources->lookup("unittest:///root/A.cc").Digest, + Digest); +} + +TEST(BackgroundIndexTest, DirectIncludesTest) { + MockFSProvider FS; + FS.Files[testPath("root/B.h")] = ""; + FS.Files[testPath("root/A.h")] = R"cpp( + #include "B.h" + void common(); + void f_b(); + class A_CC {}; + )cpp"; + std::string A_CC = "#include \"A.h\"\nvoid g() { (void)common; }"; + FS.Files[testPath("root/A.cc")] = A_CC; + + llvm::StringMap Storage; + size_t CacheHits = 0; + MemoryShardStorage MSS(Storage, CacheHits); + + tooling::CompileCommand Cmd; + Cmd.Filename = testPath("root/A.cc"); + Cmd.Directory = testPath("root"); + Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; + { + BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"}, + [&](llvm::StringRef) { return &MSS; }); + Idx.enqueue(testPath("root"), Cmd); + Idx.blockUntilIdleForTest(); + } + + auto ShardSource = MSS.loadShard(testPath("root/A.cc")); + EXPECT_TRUE(ShardSource->Sources); + EXPECT_EQ(ShardSource->Sources->size(), 3U); // A.cc, A.h, B.h + EXPECT_THAT( + ShardSource->Sources->lookup("unittest:///root/A.cc").DirectIncludes, + UnorderedElementsAre("unittest:///root/A.h")); + + auto ShardHeader = MSS.loadShard(testPath("root/A.h")); + EXPECT_TRUE(ShardHeader->Sources); + EXPECT_EQ(ShardHeader->Sources->size(), 2U); // A.h B.h + EXPECT_THAT( + ShardHeader->Sources->lookup("unittest:///root/A.h").DirectIncludes, + UnorderedElementsAre("unittest:///root/B.h")); } } // namespace clangd Index: unittests/clangd/SerializationTests.cpp =================================================================== --- unittests/clangd/SerializationTests.cpp +++ unittests/clangd/SerializationTests.cpp @@ -173,31 +173,44 @@ UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); } -TEST(SerializationTest, HashTest) { +TEST(SerializationTest, SrcsTest) { auto In = readIndexFile(YAML); EXPECT_TRUE(bool(In)) << In.takeError(); - std::string TestContent("TESTCONTENT"); - auto Digest = + std::string TestContent("TestContent"); + IndexFileIn::SourceFile SF; + SF.Digest = llvm::SHA1::hash({reinterpret_cast(TestContent.data()), TestContent.size()}); + SF.DirectIncludes = {"inc1", "inc2"}; + SF.FileURI = "FileURI"; + SF.IsTU = true; + llvm::StringMap Sources; + Sources[SF.FileURI] = SF; // Write to binary format, and parse again. IndexFileOut Out(*In); Out.Format = IndexFileFormat::RIFF; - Out.Digest = &Digest; - std::string Serialized = to_string(Out); - - auto In2 = readIndexFile(Serialized); - ASSERT_TRUE(bool(In2)) << In.takeError(); - ASSERT_EQ(In2->Digest, Digest); - ASSERT_TRUE(In2->Symbols); - ASSERT_TRUE(In2->Refs); - - // Assert the YAML serializations match, for nice comparisons and diffs. - EXPECT_THAT(YAMLFromSymbols(*In2->Symbols), - UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); - EXPECT_THAT(YAMLFromRefs(*In2->Refs), - UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); + Out.Sources = &Sources; + { + std::string Serialized = to_string(Out); + + auto In = readIndexFile(Serialized); + ASSERT_TRUE(bool(In)) << In.takeError(); + ASSERT_TRUE(In->Symbols); + ASSERT_TRUE(In->Refs); + ASSERT_TRUE(In->Sources); + ASSERT_TRUE(In->Sources->count(SF.FileURI)); + // Assert the YAML serializations match, for nice comparisons and diffs. + EXPECT_THAT(YAMLFromSymbols(*In->Symbols), + UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); + EXPECT_THAT(YAMLFromRefs(*In->Refs), + UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); + auto SFDeserialized = In->Sources->lookup(SF.FileURI); + EXPECT_EQ(SFDeserialized.Digest, SF.Digest); + EXPECT_EQ(SFDeserialized.DirectIncludes, SF.DirectIncludes); + EXPECT_EQ(SFDeserialized.FileURI, SF.FileURI); + EXPECT_EQ(SFDeserialized.IsTU, SF.IsTU); + } } } // namespace