diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h --- a/clang-tools-extra/clangd/index/Serialization.h +++ b/clang-tools-extra/clangd/index/Serialization.h @@ -41,6 +41,7 @@ struct IndexFileIn { llvm::Optional Symbols; llvm::Optional Refs; + llvm::Optional Relations; // Keys are URIs of the source files. llvm::Optional Sources; }; @@ -51,6 +52,7 @@ struct IndexFileOut { const SymbolSlab *Symbols = nullptr; const RefSlab *Refs = nullptr; + const RelationSlab *Relations = nullptr; // Keys are URIs of the source files. const IncludeGraph *Sources = nullptr; // TODO: Support serializing Dex posting lists. @@ -59,7 +61,8 @@ IndexFileOut() = default; IndexFileOut(const IndexFileIn &I) : Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr), - Refs(I.Refs ? I.Refs.getPointer() : nullptr) {} + Refs(I.Refs ? I.Refs.getPointer() : nullptr), + Relations(I.Relations ? I.Relations.getPointer() : nullptr) {} }; // Serializes an index file. llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O); @@ -67,12 +70,18 @@ // Convert a single symbol to YAML, a nice debug representation. std::string toYAML(const Symbol &); std::string toYAML(const std::pair> &); +std::string toYAML(const Relation &); // Build an in-memory static index from an index file. // The size should be relatively small, so data can be managed in memory. std::unique_ptr loadIndex(llvm::StringRef Filename, bool UseDex = true); +// Used for serializing SymbolRole as used in Relation. +enum class RelationKind : uint8_t { ChildOf = 1, BaseOf }; +llvm::Expected symbolRoleToRelationKind(index::SymbolRole); +llvm::Expected relationKindToSymbolRole(RelationKind); + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -24,6 +24,37 @@ return llvm::make_error(Msg, llvm::inconvertibleErrorCode()); } +} // namespace + +llvm::Expected symbolRoleToRelationKind(index::SymbolRole Role) { + // SymbolRole is used to record relations in the index. + // Only handle the relations we actually store currently. + // If we start storing more relations, this list can be expanded. + switch (Role) { + case index::SymbolRole::RelationChildOf: { + return RelationKind::ChildOf; + } + case index::SymbolRole::RelationBaseOf: { + return RelationKind::BaseOf; + } + default: + return makeError("invalid relation kind"); + } +} + +llvm::Expected relationKindToSymbolRole(RelationKind Kind) { + switch (Kind) { + case RelationKind::ChildOf: { + return index::SymbolRole::RelationChildOf; + } + case RelationKind::BaseOf: { + return index::SymbolRole::RelationBaseOf; + } + } + return makeError("invalid relation kind"); +} + +namespace { // IO PRIMITIVES // We use little-endian 32 bit ints, sometimes with variable-length encoding. @@ -44,6 +75,8 @@ // The "error" bit is set by reading past EOF or reading invalid data. // When in an error state, reads may return zero values: callers should check. bool err() const { return Err; } + // A caller can set the error bit if an invalid value was read. + void setErr() { Err = true; } // Did we read all the data, or encounter an error? bool eof() const { return Begin == End || Err; } // All the data we didn't read yet. @@ -358,6 +391,32 @@ return Result; } +// RELATIONS ENCODING +// A relations section is a flat list of relations. Each relation has: +// - SymbolID (subject): 8 bytes +// - relation kind (predicate): 1 byte +// - SymbolID (object): 8 bytes + +void writeRelation(const Relation &R, llvm::raw_ostream &OS) { + OS << R.Subject.raw(); + RelationKind Kind = cantFail(symbolRoleToRelationKind(R.Predicate)); + OS.write(static_cast(Kind)); + OS << R.Object.raw(); +} + +Relation readRelation(Reader &Data) { + SymbolID Subject = Data.consumeID(); + index::SymbolRole Predicate{}; + if (auto Role = relationKindToSymbolRole( + static_cast(Data.consume8()))) { + Predicate = *Role; + } else { + Data.setErr(); + } + SymbolID Object = Data.consumeID(); + return {Subject, Predicate, Object}; +} + // FILE ENCODING // A file is a RIFF chunk with type 'CdIx'. // It contains the sections: @@ -434,6 +493,17 @@ return makeError("malformed or truncated refs"); Result.Refs = std::move(Refs).build(); } + if (Chunks.count("rela")) { + Reader RelationsReader(Chunks.lookup("rela")); + RelationSlab::Builder Relations; + while (!RelationsReader.eof()) { + auto Relation = readRelation(RelationsReader); + Relations.insert(Relation); + } + if (RelationsReader.err()) + return makeError("malformed or truncated relations"); + Result.Relations = std::move(Relations).build(); + } return std::move(Result); } @@ -483,6 +553,14 @@ } } + std::vector Relations; + if (Data.Relations) { + for (const auto &Relation : *Data.Relations) { + Relations.emplace_back(Relation); + // No strings to be interned in relations. + } + } + std::string StringSection; { llvm::raw_string_ostream StringOS(StringSection); @@ -508,6 +586,16 @@ RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection}); } + std::string RelationSection; + if (Data.Relations) { + { + llvm::raw_string_ostream RelationOS{RelationSection}; + for (const auto &Relation : Relations) + writeRelation(Relation, RelationOS); + } + RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection}); + } + std::string SrcsSection; { { @@ -561,6 +649,7 @@ SymbolSlab Symbols; RefSlab Refs; + RelationSlab Relations; { trace::Span Tracer("ParseIndex"); if (auto I = readIndexFile(Buffer->get()->getBuffer())) { @@ -568,6 +657,8 @@ Symbols = std::move(*I->Symbols); if (I->Refs) Refs = std::move(*I->Refs); + if (I->Relations) + Relations = std::move(*I->Relations); } else { llvm::errs() << "Bad Index: " << llvm::toString(I.takeError()) << "\n"; return nullptr; @@ -576,15 +667,17 @@ size_t NumSym = Symbols.size(); size_t NumRefs = Refs.numRefs(); + size_t NumRelations = Relations.size(); trace::Span Tracer("BuildIndex"); auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs)) : MemIndex::build(std::move(Symbols), std::move(Refs)); vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n" " - number of symbols: {3}\n" - " - number of refs: {4}\n", + " - number of refs: {4}\n" + " - numnber of relations: {5}", UseDex ? "Dex" : "MemIndex", SymbolFilename, - Index->estimateMemoryUsage(), NumSym, NumRefs); + Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations); return Index; } diff --git a/clang-tools-extra/clangd/index/YAMLSerialization.cpp b/clang-tools-extra/clangd/index/YAMLSerialization.cpp --- a/clang-tools-extra/clangd/index/YAMLSerialization.cpp +++ b/clang-tools-extra/clangd/index/YAMLSerialization.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "Index.h" +#include "Relation.h" #include "Serialization.h" #include "SymbolLocation.h" #include "SymbolOrigin.h" @@ -35,10 +36,11 @@ namespace { using RefBundle = std::pair>; -// This is a pale imitation of std::variant +// This is a pale imitation of std::variant struct VariantEntry { llvm::Optional Symbol; llvm::Optional Refs; + llvm::Optional Relation; }; // A class helps YAML to serialize the 32-bit encoded position (Line&Column), // as YAMLIO can't directly map bitfields. @@ -53,6 +55,8 @@ using clang::clangd::Ref; using clang::clangd::RefKind; +using clang::clangd::Relation; +using clang::clangd::RelationKind; using clang::clangd::Symbol; using clang::clangd::SymbolID; using clang::clangd::SymbolLocation; @@ -60,6 +64,7 @@ using clang::index::SymbolInfo; using clang::index::SymbolKind; using clang::index::SymbolLanguage; +using clang::index::SymbolRole; // Helper to (de)serialize the SymbolID. We serialize it as a hex string. struct NormalizedSymbolID { @@ -275,6 +280,47 @@ } }; +struct NormalizedSymbolRole { + NormalizedSymbolRole(IO &) {} + NormalizedSymbolRole(IO &IO, SymbolRole R) { + auto K = clang::clangd::symbolRoleToRelationKind(R); + if (K) { + Kind = static_cast(*K); + } else { + IO.setError("invalid relation kind"); + } + } + + SymbolRole denormalize(IO &IO) { + auto R = clang::clangd::relationKindToSymbolRole( + static_cast(Kind)); + if (R) { + return *R; + } + IO.setError("invalid relation kind"); + return SymbolRole(); + } + + uint8_t Kind = 0; +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, SymbolID &ID) { + MappingNormalization NSymbolID(IO, ID); + IO.mapRequired("ID", NSymbolID->HexString); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, Relation &Relation) { + MappingNormalization NRole( + IO, Relation.Predicate); + IO.mapRequired("Subject", Relation.Subject); + IO.mapRequired("Predicate", NRole->Kind); + IO.mapRequired("Object", Relation.Object); + } +}; + template <> struct MappingTraits { static void mapping(IO &IO, VariantEntry &Variant) { if (IO.mapTag("!Symbol", Variant.Symbol.hasValue())) { @@ -285,6 +331,10 @@ if (!IO.outputting()) Variant.Refs.emplace(); MappingTraits::mapping(IO, *Variant.Refs); + } else if (IO.mapTag("!Relations", Variant.Relation.hasValue())) { + if (!IO.outputting()) + Variant.Relation.emplace(); + MappingTraits::mapping(IO, *Variant.Relation); } } }; @@ -308,11 +358,18 @@ Entry.Refs = Sym; Yout << Entry; } + if (O.Relations) + for (auto &R : *O.Relations) { + VariantEntry Entry; + Entry.Relation = R; + Yout << Entry; + } } llvm::Expected readYAML(llvm::StringRef Data) { SymbolSlab::Builder Symbols; RefSlab::Builder Refs; + RelationSlab::Builder Relations; llvm::BumpPtrAllocator Arena; // store the underlying data of Position::FileURI. llvm::UniqueStringSaver Strings(Arena); @@ -329,12 +386,15 @@ if (Variant.Refs) for (const auto &Ref : Variant.Refs->second) Refs.insert(Variant.Refs->first, Ref); + if (Variant.Relation) + Relations.insert(*Variant.Relation); Yin.nextDocument(); } IndexFileIn Result; Result.Symbols.emplace(std::move(Symbols).build()); Result.Refs.emplace(std::move(Refs).build()); + Result.Relations.emplace(std::move(Relations).build()); return std::move(Result); } @@ -360,5 +420,16 @@ return Buf; } +std::string toYAML(const Relation &R) { + std::string Buf; + { + llvm::raw_string_ostream OS(Buf); + llvm::yaml::Output Yout(OS); + Relation Rel = R; // copy: Yout<< requires mutability. + Yout << Rel; + } + return Buf; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/SerializationTests.cpp b/clang-tools-extra/clangd/unittests/SerializationTests.cpp --- a/clang-tools-extra/clangd/unittests/SerializationTests.cpp +++ b/clang-tools-extra/clangd/unittests/SerializationTests.cpp @@ -149,8 +149,15 @@ } std::vector YAMLFromRefs(const RefSlab &Slab) { std::vector Result; - for (const auto &Sym : Slab) - Result.push_back(toYAML(Sym)); + for (const auto &Refs : Slab) + Result.push_back(toYAML(Refs)); + return Result; +} + +std::vector YAMLFromRelations(const RelationSlab &Slab) { + std::vector Result; + for (const auto &Rel : Slab) + Result.push_back(toYAML(Rel)); return Result; } @@ -215,6 +222,110 @@ } } +const char *RelationsYAML = R"( +--- !Symbol +ID: 6481EE7AF2841756 +Name: Base +Scope: '' +SymInfo: + Kind: Struct + Lang: C +CanonicalDeclaration: + FileURI: 'file:///path/foo.cc' + Start: + Line: 0 + Column: 7 + End: + Line: 0 + Column: 11 +Definition: + FileURI: 'file:///path/foo.cc' + Start: + Line: 0 + Column: 7 + End: + Line: 0 + Column: 11 +References: 1 +Origin: 4 +Flags: 0 +Signature: '' +TemplateSpecializationArgs: '' +CompletionSnippetSuffix: '' +Documentation: '' +ReturnType: '' +Type: '' +... +--- !Symbol +ID: 6512AEC512EA3A2D +Name: Derived +Scope: '' +SymInfo: + Kind: Struct + Lang: Cpp +CanonicalDeclaration: + FileURI: 'file:///path/foo.cc' + Start: + Line: 1 + Column: 7 + End: + Line: 1 + Column: 14 +Definition: + FileURI: 'file:///path/foo.cc' + Start: + Line: 1 + Column: 7 + End: + Line: 1 + Column: 14 +Origin: 4 +Flags: 0 +Signature: '' +TemplateSpecializationArgs: '' +CompletionSnippetSuffix: '' +Documentation: '' +ReturnType: '' +Type: '' +... +--- !Relations +Subject: + ID: 6481EE7AF2841756 +Predicate: 2 +Object: + ID: 6512AEC512EA3A2D +... +)"; + +TEST(SerializationTest, Relations) { + auto In = readIndexFile(RelationsYAML); + + EXPECT_TRUE(bool(In)) << In.takeError(); + + SymbolID Base = cantFail(SymbolID::fromStr("6481EE7AF2841756")); + SymbolID Derived = cantFail(SymbolID::fromStr("6512AEC512EA3A2D")); + + EXPECT_TRUE(bool(In->Relations)); + EXPECT_THAT(*In->Relations, + UnorderedElementsAre( + Relation{Base, index::SymbolRole::RelationBaseOf, Derived})); + + // Write to binary format, and parse again. + IndexFileOut Out(*In); + Out.Format = IndexFileFormat::RIFF; + std::string Serialized = llvm::to_string(Out); + + auto In2 = readIndexFile(Serialized); + ASSERT_TRUE(bool(In2)) << In.takeError(); + ASSERT_TRUE(In2->Symbols); + ASSERT_TRUE(In2->Refs); + ASSERT_TRUE(In2->Relations); + + // Assert the YAML serializations match, for nice comparisons and diffs. + EXPECT_THAT(YAMLFromRelations(*In2->Relations), + UnorderedElementsAreArray(YAMLFromRelations(*In->Relations))); +} + } // namespace } // namespace clangd } // namespace clang