Index: clangd/index/IndexAction.h =================================================================== --- clangd/index/IndexAction.h +++ clangd/index/IndexAction.h @@ -21,10 +21,12 @@ // Only a subset of SymbolCollector::Options are respected: // - include paths are always collected, and canonicalized appropriately // - references are always counted +// - all refs are collected (if RefsCallback is non-null) // - the symbol origin is always Static std::unique_ptr createStaticIndexingAction(SymbolCollector::Options Opts, - std::function SymbolsCallback); + std::function SymbolsCallback, + std::function RefsCallback); } // namespace clangd } // namespace clang Index: clangd/index/IndexAction.cpp =================================================================== --- clangd/index/IndexAction.cpp +++ clangd/index/IndexAction.cpp @@ -13,10 +13,11 @@ IndexAction(std::shared_ptr C, std::unique_ptr Includes, const index::IndexingOptions &Opts, - std::function &SymbolsCallback) + std::function SymbolsCallback, + std::function RefsCallback) : WrapperFrontendAction(index::createIndexingAction(C, Opts, nullptr)), - SymbolsCallback(SymbolsCallback), Collector(C), - Includes(std::move(Includes)), + SymbolsCallback(SymbolsCallback), RefsCallback(RefsCallback), + Collector(C), Includes(std::move(Includes)), PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {} std::unique_ptr CreateASTConsumer(CompilerInstance &CI, @@ -41,10 +42,13 @@ return; } SymbolsCallback(Collector->takeSymbols()); + if (RefsCallback != nullptr) + RefsCallback(Collector->takeRefs()); } private: std::function SymbolsCallback; + std::function RefsCallback; std::shared_ptr Collector; std::unique_ptr Includes; std::unique_ptr PragmaHandler; @@ -54,19 +58,22 @@ std::unique_ptr createStaticIndexingAction(SymbolCollector::Options Opts, - std::function SymbolsCallback) { + std::function SymbolsCallback, + std::function RefsCallback) { index::IndexingOptions IndexOpts; IndexOpts.SystemSymbolFilter = index::IndexingOptions::SystemSymbolFilterKind::All; Opts.CollectIncludePath = true; Opts.CountReferences = true; Opts.Origin = SymbolOrigin::Static; + if (RefsCallback != nullptr) + Opts.RefFilter = RefKind::All; auto Includes = llvm::make_unique(); addSystemHeadersMapping(Includes.get()); Opts.Includes = Includes.get(); return llvm::make_unique( std::make_shared(std::move(Opts)), std::move(Includes), - IndexOpts, SymbolsCallback); + IndexOpts, SymbolsCallback, RefsCallback); }; } // namespace clangd Index: clangd/index/Serialization.h =================================================================== --- clangd/index/Serialization.h +++ clangd/index/Serialization.h @@ -43,25 +43,29 @@ // Holds the contents of an index file that was read. struct IndexFileIn { llvm::Optional Symbols; + llvm::Optional Refs; }; -// Parse an index file. The input must be a RIFF container chunk. +// Parse an index file. The input must be a RIFF or YAML file. llvm::Expected readIndexFile(llvm::StringRef); // Specifies the contents of an index file to be written. struct IndexFileOut { - const SymbolSlab *Symbols; - // TODO: Support serializing symbol occurrences. + const SymbolSlab *Symbols = nullptr; + const RefSlab *Refs = nullptr; // TODO: Support serializing Dex posting lists. IndexFileFormat Format = IndexFileFormat::RIFF; IndexFileOut() = default; IndexFileOut(const IndexFileIn &I) - : Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr) {} + : Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr), + Refs(I.Refs ? I.Refs.getPointer() : nullptr) {} }; // Serializes an index file. llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O); std::string toYAML(const Symbol &); +std::string toYAML(const std::pair> &); + // Returned symbol is backed by the YAML input. // FIXME: this is only needed for IndexerMain, find a better solution. llvm::Expected symbolFromYAML(llvm::yaml::Input &); Index: clangd/index/Serialization.cpp =================================================================== --- clangd/index/Serialization.cpp +++ clangd/index/Serialization.cpp @@ -296,6 +296,35 @@ return Sym; } +// REFS ENCODING +// A refs section has data grouped by Symbol. Each symbol has: +// - SymbolID: 20 bytes +// - NumRefs: varint +// - Ref[NumRefs] +// Fields of Ref are encoded in turn, see implementation. + +void writeRefs(const SymbolID &ID, ArrayRef Refs, + const StringTableOut &Strings, raw_ostream &OS) { + OS << ID.raw(); + writeVar(Refs.size(), OS); + for (const auto& Ref : Refs) { + OS.write(static_cast(Ref.Kind)); + writeLocation(Ref.Location, Strings, OS); + } +} + +std::pair> readRefs(Reader &Data, + ArrayRef Strings) { + std::pair> Result; + Result.first = Data.consumeID(); + Result.second.resize(Data.consumeVar()); + for (auto& Ref : Result.second) { + Ref.Kind = static_cast(Data.consume8()); + Ref.Location= readLocation(Data, Strings); + } + return Result; +} + // FILE ENCODING // A file is a RIFF chunk with type 'CdIx'. // It contains the sections: @@ -306,7 +335,7 @@ // The current versioning scheme is simple - non-current versions are rejected. // If you make a breaking change, bump this version number to invalidate stored // data. Later we may want to support some backward compatibility. -constexpr static uint32_t Version = 4; +constexpr static uint32_t Version = 5; Expected readRIFF(StringRef Data) { auto RIFF = riff::readFile(Data); @@ -340,6 +369,18 @@ return makeError("malformed or truncated symbol"); Result.Symbols = std::move(Symbols).build(); } + if (Chunks.count("refs")) { + Reader RefsReader(Chunks.lookup("refs")); + RefSlab::Builder Refs; + while (!RefsReader.eof()) { + auto RefsBundle = readRefs(RefsReader, Strings->Strings); + for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert? + Refs.insert(RefsBundle.first, Ref); + } + if (RefsReader.err()) + return makeError("malformed or truncated refs"); + Result.Refs = std::move(Refs).build(); + } return std::move(Result); } @@ -361,6 +402,14 @@ Symbols.emplace_back(Sym); visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); }); } + std::vector>> Refs; + if (Data.Refs) { + for (const auto &Sym : *Data.Refs) { + Refs.emplace_back(Sym); + for (auto &Ref : Refs.back().second) + Strings.intern(Ref.Location.FileURI); + } + } std::string StringSection; { @@ -377,6 +426,16 @@ } RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection}); + std::string RefsSection; + if (Data.Refs) { + { + raw_string_ostream RefsOS(RefsSection); + for (const auto &Sym : Refs) + writeRefs(Sym.first, Sym.second, Strings, RefsOS); + } + RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection}); + } + OS << RIFF; } @@ -426,6 +485,8 @@ if (auto I = readIndexFile(Buffer->get()->getBuffer())) { if (I->Symbols) Symbols = std::move(*I->Symbols); + if (I->Refs) + Refs = std::move(*I->Refs); } else { llvm::errs() << "Bad Index: " << llvm::toString(I.takeError()) << "\n"; return nullptr; Index: clangd/index/YAMLSerialization.cpp =================================================================== --- clangd/index/YAMLSerialization.cpp +++ clangd/index/YAMLSerialization.cpp @@ -6,6 +6,12 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// A YAML index file is a sequence of tagged entries. +// Each entry either encodes a Symbol or the list of references to a symbol +// (a "ref bundle"). +// +//===----------------------------------------------------------------------===// #include "Index.h" #include "Serialization.h" @@ -20,10 +26,22 @@ #include LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Symbol::IncludeHeaderWithReferences) +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Ref) +namespace { +using RefBundle = + std::pair>; +// This is a pale imitation of std::variant +struct VariantEntry { + llvm::Optional Symbol; + llvm::Optional Refs; +}; +} namespace llvm { namespace yaml { +using clang::clangd::Ref; +using clang::clangd::RefKind; using clang::clangd::Symbol; using clang::clangd::SymbolID; using clang::clangd::SymbolLocation; @@ -179,6 +197,46 @@ } }; +template <> struct MappingTraits { + static void mapping(IO &IO, RefBundle &Refs) { + MappingNormalization NSymbolID(IO, + Refs.first); + IO.mapRequired("ID", NSymbolID->HexString); + IO.mapRequired("References", Refs.second); + } +}; + +struct NormalizedRefKind { + NormalizedRefKind(IO &) {} + NormalizedRefKind(IO &, RefKind O) { Kind = static_cast(O); } + + RefKind denormalize(IO &) { return static_cast(Kind); } + + uint8_t Kind = 0; +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, Ref &R) { + MappingNormalization NKind(IO, R.Kind); + IO.mapRequired("Kind", NKind->Kind); + IO.mapRequired("Location", R.Location); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, VariantEntry &Variant) { + if (IO.mapTag("!Symbol", Variant.Symbol.hasValue())) { + if (!IO.outputting()) + Variant.Symbol.emplace(); + MappingTraits::mapping(IO, *Variant.Symbol); + } else if (IO.mapTag("!Refs", Variant.Refs.hasValue())) { + if (!IO.outputting()) + Variant.Refs.emplace(); + MappingTraits::mapping(IO, *Variant.Refs); + } + } +}; + } // namespace yaml } // namespace llvm @@ -187,23 +245,38 @@ void writeYAML(const IndexFileOut &O, raw_ostream &OS) { llvm::yaml::Output Yout(OS); - for (Symbol Sym : *O.Symbols) // copy: Yout<< requires mutability. - Yout << Sym; + for (const auto& Sym : *O.Symbols) { + VariantEntry Entry; + Entry.Symbol = Sym; + Yout << Entry; + } + if (O.Refs) + for (auto& Sym : *O.Refs) { + VariantEntry Entry; + Entry.Refs = Sym; + Yout << Entry; + } } Expected readYAML(StringRef Data) { SymbolSlab::Builder Symbols; + RefSlab::Builder Refs; llvm::yaml::Input Yin(Data); do { - Symbol S; - Yin >> S; + VariantEntry Variant; + Yin >> Variant; if (Yin.error()) return llvm::errorCodeToError(Yin.error()); - Symbols.insert(S); + if (Variant.Symbol) + Symbols.insert(*Variant.Symbol); + if (Variant.Refs) + for (const auto& Ref : Variant.Refs->second) + Refs.insert(Variant.Refs->first, Ref); } while (Yin.nextDocument()); IndexFileIn Result; Result.Symbols.emplace(std::move(Symbols).build()); + Result.Refs.emplace(std::move(Refs).build()); return std::move(Result); } @@ -218,6 +291,17 @@ return Buf; } +std::string toYAML(const std::pair> &Data) { + RefBundle Refs = {Data.first, Data.second}; + std::string Buf; + { + llvm::raw_string_ostream OS(Buf); + llvm::yaml::Output Yout(OS); + Yout << Refs; + } + return Buf; +} + Expected symbolFromYAML(llvm::yaml::Input &Yin) { Symbol S; Yin >> S; Index: clangd/indexer/IndexerMain.cpp =================================================================== --- clangd/indexer/IndexerMain.cpp +++ clangd/indexer/IndexerMain.cpp @@ -77,9 +77,10 @@ /// Consume a SymbolSlab build for a file. virtual void consumeSymbols(SymbolSlab Symbols) = 0; + virtual void consumeRefs(RefSlab Refs) = 0; /// Produce a resulting symbol slab, by combining occurrences of the same /// symbols across translation units. - virtual SymbolSlab mergeResults() = 0; + virtual std::pair mergeResults() = 0; }; class SymbolIndexActionFactory : public tooling::FrontendActionFactory { @@ -91,7 +92,8 @@ CollectorOpts.FallbackDir = AssumedHeaderDir; return createStaticIndexingAction( CollectorOpts, - [&](SymbolSlab S) { Consumer.consumeSymbols(std::move(S)); }) + [&](SymbolSlab S) { Consumer.consumeSymbols(std::move(S)); }, + [&](RefSlab S) { Consumer.consumeRefs(std::move(S)); }) .release(); } @@ -109,8 +111,9 @@ for (const auto &Sym : Symbols) Executor.getExecutionContext()->reportResult(Sym.ID.str(), toYAML(Sym)); } + void consumeRefs(RefSlab) override {} - SymbolSlab mergeResults() override { + std::pair mergeResults() override { SymbolSlab::Builder UniqueSymbols; Executor.getToolResults()->forEachResult( [&](llvm::StringRef Key, llvm::StringRef Value) { @@ -122,7 +125,7 @@ else UniqueSymbols.insert(Sym); }); - return std::move(UniqueSymbols).build(); + return {std::move(UniqueSymbols).build(), RefSlab()}; } private: @@ -136,21 +139,29 @@ void consumeSymbols(SymbolSlab Symbols) override { std::lock_guard Lock(Mut); for (auto &&Sym : Symbols) { - if (const auto *Existing = Result.find(Sym.ID)) - Result.insert(mergeSymbol(*Existing, Sym)); + if (const auto *Existing = this->Symbols.find(Sym.ID)) + this->Symbols.insert(mergeSymbol(*Existing, Sym)); else - Result.insert(Sym); + this->Symbols.insert(Sym); + } + } + void consumeRefs(RefSlab Refs) override { + std::lock_guard Lock(Mut); + for (const auto &Sym : Refs) { + for (const auto& Ref : Sym.second) + this->Refs.insert(Sym.first, Ref); } } - SymbolSlab mergeResults() override { + std::pair mergeResults() override { std::lock_guard Lock(Mut); - return std::move(Result).build(); + return {std::move(Symbols).build(), std::move(Refs).build()}; } private: std::mutex Mut; - SymbolSlab::Builder Result; + SymbolSlab::Builder Symbols; + RefSlab::Builder Refs; }; } // namespace @@ -215,7 +226,8 @@ auto UniqueSymbols = Consumer->mergeResults(); // Output phase: emit result symbols. clang::clangd::IndexFileOut Out; - Out.Symbols = &UniqueSymbols; + Out.Symbols = &UniqueSymbols.first; + Out.Refs = &UniqueSymbols.second; Out.Format = clang::clangd::Format; llvm::outs() << Out; return 0; Index: unittests/clangd/SerializationTests.cpp =================================================================== --- unittests/clangd/SerializationTests.cpp +++ unittests/clangd/SerializationTests.cpp @@ -13,7 +13,9 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +using testing::_; using testing::AllOf; +using testing::Pair; using testing::UnorderedElementsAre; using testing::UnorderedElementsAreArray; namespace clang { @@ -22,6 +24,7 @@ const char *YAML = R"( --- +!Symbol ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 Name: 'Foo1' Scope: 'clang::' @@ -47,6 +50,7 @@ References: 3 ... --- +!Symbol ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858 Name: 'Foo2' Scope: 'clang::' @@ -65,6 +69,18 @@ Signature: '-sig' CompletionSnippetSuffix: '-snippet' ... +!Refs +ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 +References: + - Kind: 4 + Location: + FileURI: file:///path/foo.cc + Start: + Line: 5 + Column: 3 + End: + Line: 5 + Column: 8 )"; MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); } @@ -108,6 +124,16 @@ EXPECT_EQ(Sym2.CanonicalDeclaration.FileURI, "file:///path/bar.h"); EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion); EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated); + + ASSERT_TRUE(bool(ParsedYAML->Refs)); + EXPECT_THAT(*ParsedYAML->Refs, + UnorderedElementsAre( + Pair(cantFail(SymbolID::fromStr( + "057557CEBF6E6B2DD437FBF60CC58F352D1DF856")), + testing::SizeIs(1)))); + auto Ref1 = ParsedYAML->Refs->begin()->second.front(); + EXPECT_EQ(Ref1.Kind, RefKind::Reference); + EXPECT_EQ(Ref1.Location.FileURI, "file:///path/foo.cc"); } std::vector YAMLFromSymbols(const SymbolSlab &Slab) { @@ -116,24 +142,38 @@ Result.push_back(toYAML(Sym)); return Result; } +std::vector YAMLFromRefs(const RefSlab &Slab) { + std::vector Result; + for (const auto &Sym : Slab) + Result.push_back(toYAML(Sym)); + return Result; +} + TEST(SerializationTest, BinaryConversions) { auto In = readIndexFile(YAML); EXPECT_TRUE(bool(In)) << In.takeError(); // Write to binary format, and parse again. - IndexFileOut Out; - Out.Symbols = In->Symbols.getPointer(); + IndexFileOut Out(*In); Out.Format = IndexFileFormat::RIFF; std::string Serialized = llvm::to_string(Out); + { + std::error_code EC; + llvm::raw_fd_ostream F("/tmp/foo",EC); + F << Serialized; + } auto In2 = readIndexFile(Serialized); ASSERT_TRUE(bool(In2)) << In.takeError(); - ASSERT_TRUE(In->Symbols); + ASSERT_TRUE(In2->Symbols); + ASSERT_TRUE(In2->Refs); // Assert the YAML serializations match, for nice comparisons and diffs. EXPECT_THAT(YAMLFromSymbols(*In2->Symbols), UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); + EXPECT_THAT(YAMLFromRefs(*In2->Refs), + UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); } } // namespace