Index: clangd/index/Background.h =================================================================== --- clangd/index/Background.h +++ clangd/index/Background.h @@ -87,9 +87,11 @@ private: /// Given index results from a TU, only update files in \p FilesToUpdate. + /// Also stores new index information on IndexStorage. void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs, const llvm::StringMap &FilesToUpdate, - BackgroundIndexStorage *IndexStorage); + BackgroundIndexStorage *IndexStorage, + const IncludeStructure &Includes); // configuration std::string ResourceDir; Index: clangd/index/Background.cpp =================================================================== --- clangd/index/Background.cpp +++ clangd/index/Background.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/SHA1.h" +#include "llvm/Support/ScopedPrinter.h" #include #include @@ -187,7 +188,8 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, RefSlab Refs, const StringMap &FilesToUpdate, - BackgroundIndexStorage *IndexStorage) { + BackgroundIndexStorage *IndexStorage, + const IncludeStructure& Includes) { // Partition symbols/references into files. struct File { DenseSet Symbols; @@ -247,6 +249,14 @@ Shard.Symbols = SS.get(); Shard.Refs = RS.get(); Shard.Digest = &Hash; + std::vector DirectIncludes; + for (const auto &Include : Includes.MainFileIncludes) { + if (auto U = URI::create(Include.Resolved, URISchemes)) + DirectIncludes.push_back(U->toString()); + else + assert(U && llvm::to_string(U.takeError()).c_str()); + } + Shard.DirectIncludes = &DirectIncludes; if (auto Error = IndexStorage->storeShard(Path, Shard)) elog("Failed to write background-index shard for file {0}: {1}", Path, std::move(Error)); @@ -359,6 +369,11 @@ if (!Action->BeginSourceFile(*Clang, Input)) return createStringError(inconvertibleErrorCode(), "BeginSourceFile() failed"); + + IncludeStructure Includes; + Clang->getPreprocessor().addPPCallbacks( + collectIncludeStructureCallback(Clang->getSourceManager(), &Includes)); + if (!Action->Execute()) return createStringError(inconvertibleErrorCode(), "Execute() failed"); Action->EndSourceFile(); @@ -368,7 +383,7 @@ SPAN_ATTACH(Tracer, "symbols", int(Symbols.size())); SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs())); update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate, - IndexStorage); + IndexStorage, Includes); { // Make sure hash for the main file is always updated even if there is no // index data in it. Index: clangd/index/Serialization.h =================================================================== --- clangd/index/Serialization.h +++ clangd/index/Serialization.h @@ -42,6 +42,8 @@ llvm::Optional Refs; // Digest of the source file that generated the contents. llvm::Optional Digest; + // URIs of headers directly included in the source file. + llvm::Optional> DirectIncludes; }; // Parse an index file. The input must be a RIFF or YAML file. llvm::Expected readIndexFile(llvm::StringRef); @@ -52,6 +54,8 @@ const RefSlab *Refs = nullptr; // Digest of the source file that generated the contents. const IndexFileIn::FileDigest *Digest = nullptr; + // URIs of headers directly included in the source file. + const std::vector *DirectIncludes = nullptr; // TODO: Support serializing Dex posting lists. IndexFileFormat Format = IndexFileFormat::RIFF; Index: clangd/index/Serialization.cpp =================================================================== --- clangd/index/Serialization.cpp +++ clangd/index/Serialization.cpp @@ -331,7 +331,9 @@ // A file is a RIFF chunk with type 'CdIx'. // It contains the sections: // - meta: version number -// - srcs: checksum of the source file +// - srcs: information related to source file +// - checksum of the source file +// - uris for direct includes of the source file // - stri: string table // - symb: symbols // - refs: references to symbols @@ -365,10 +367,17 @@ IndexFileIn Result; if (Chunks.count("srcs")) { - Reader Hash(Chunks.lookup("srcs")); + Reader SrcsReader(Chunks.lookup("srcs")); Result.Digest.emplace(); - llvm::StringRef Digest = Hash.consume(Result.Digest->size()); + llvm::StringRef Digest = SrcsReader.consume(Result.Digest->size()); std::copy(Digest.bytes_begin(), Digest.bytes_end(), Result.Digest->begin()); + Result.DirectIncludes.emplace(); + while (!SrcsReader.eof()) { + Result.DirectIncludes->push_back( + SrcsReader.consumeString(Strings->Strings)); + } + if (SrcsReader.err()) + return makeError("malformed or truncated include uri"); } if (Chunks.count("symb")) { @@ -407,18 +416,19 @@ } RIFF.Chunks.push_back({riff::fourCC("meta"), Meta}); - if (Data.Digest) { - llvm::StringRef Hash(reinterpret_cast(Data.Digest->data()), - Data.Digest->size()); - RIFF.Chunks.push_back({riff::fourCC("srcs"), Hash}); - } - StringTableOut Strings; std::vector Symbols; for (const auto &Sym : *Data.Symbols) { Symbols.emplace_back(Sym); visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); }); } + std::vector Includes; + if (Data.DirectIncludes) { + for (const auto &Include : *Data.DirectIncludes) { + Includes.push_back(Include); + Strings.intern(Includes.back()); + } + } std::vector>> Refs; if (Data.Refs) { for (const auto &Sym : *Data.Refs) { @@ -456,6 +466,21 @@ RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection}); } + // There's no point in putting headers without digest of the source file. + // Because they will only be needed whenever we have an up-to-date index file. + std::string SrcsSection; + if (Data.Digest) { + { + raw_string_ostream SrcsOS(SrcsSection); + llvm::StringRef Hash(reinterpret_cast(Data.Digest->data()), + Data.Digest->size()); + SrcsOS << Hash; + for (const auto &Include : Includes) + writeVar(Strings.index(Include), SrcsOS); + } + RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection}); + } + OS << RIFF; } Index: unittests/clangd/BackgroundIndexTests.cpp =================================================================== --- unittests/clangd/BackgroundIndexTests.cpp +++ unittests/clangd/BackgroundIndexTests.cpp @@ -161,5 +161,37 @@ EXPECT_EQ(*ShardSource->Digest, Digest); } +TEST(BackgroundIndexTest, DirectIncludesTest) { + MockFSProvider FS; + FS.Files[testPath("root/B.h")] = ""; + FS.Files[testPath("root/A.h")] = R"cpp( + #include "B.h" + void common(); + void f_b(); + class A_CC {}; + )cpp"; + std::string A_CC = "#include \"A.h\"\nvoid g() { (void)common; }"; + FS.Files[testPath("root/A.cc")] = A_CC; + + llvm::StringMap Storage; + size_t CacheHits = 0; + MemoryShardStorage MSS(Storage, CacheHits); + + tooling::CompileCommand Cmd; + Cmd.Filename = testPath("root/A.cc"); + Cmd.Directory = testPath("root"); + Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; + { + BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"}, + [&](llvm::StringRef) { return &MSS; }); + Idx.enqueue(testPath("root"), Cmd); + Idx.blockUntilIdleForTest(); + } + + auto ShardSource = MSS.loadShard(testPath("root/A.cc")); + EXPECT_THAT(*ShardSource->DirectIncludes, + UnorderedElementsAre("unittest:///root/A.h")); +} + } // namespace clangd } // namespace clang Index: unittests/clangd/SerializationTests.cpp =================================================================== --- unittests/clangd/SerializationTests.cpp +++ unittests/clangd/SerializationTests.cpp @@ -173,7 +173,7 @@ UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); } -TEST(SerializationTest, HashTest) { +TEST(SerializationTest, SrcsTest) { auto In = readIndexFile(YAML); EXPECT_TRUE(bool(In)) << In.takeError(); @@ -185,19 +185,39 @@ IndexFileOut Out(*In); Out.Format = IndexFileFormat::RIFF; Out.Digest = &Digest; - std::string Serialized = to_string(Out); - - auto In2 = readIndexFile(Serialized); - ASSERT_TRUE(bool(In2)) << In.takeError(); - ASSERT_EQ(In2->Digest, Digest); - ASSERT_TRUE(In2->Symbols); - ASSERT_TRUE(In2->Refs); - - // Assert the YAML serializations match, for nice comparisons and diffs. - EXPECT_THAT(YAMLFromSymbols(*In2->Symbols), - UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); - EXPECT_THAT(YAMLFromRefs(*In2->Refs), - UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); + { + std::string Serialized = to_string(Out); + + auto In = readIndexFile(Serialized); + ASSERT_TRUE(bool(In)) << In.takeError(); + ASSERT_EQ(*In->Digest, Digest); + ASSERT_TRUE(In->Symbols); + ASSERT_TRUE(In->Refs); + // Assert the YAML serializations match, for nice comparisons and diffs. + EXPECT_THAT(YAMLFromSymbols(*In->Symbols), + UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); + EXPECT_THAT(YAMLFromRefs(*In->Refs), + UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); + } + + std::vector DirectIncludes = {"inc1", "inc2"}; + Out.DirectIncludes = &DirectIncludes; + { + std::string Serialized = to_string(Out); + + auto In = readIndexFile(Serialized); + ASSERT_TRUE(bool(In)) << In.takeError(); + ASSERT_EQ(*In->Digest, Digest); + ASSERT_TRUE(In->Symbols); + ASSERT_TRUE(In->Refs); + EXPECT_THAT(*In->DirectIncludes, UnorderedElementsAreArray(DirectIncludes)); + + // Assert the YAML serializations match, for nice comparisons and diffs. + EXPECT_THAT(YAMLFromSymbols(*In->Symbols), + UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); + EXPECT_THAT(YAMLFromRefs(*In->Refs), + UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); + } } } // namespace