Index: clangd/CMakeLists.txt =================================================================== --- clangd/CMakeLists.txt +++ clangd/CMakeLists.txt @@ -44,7 +44,7 @@ index/Merge.cpp index/Serialization.cpp index/SymbolCollector.cpp - index/SymbolYAML.cpp + index/YAMLSerialization.cpp index/dex/Dex.cpp index/dex/Iterator.cpp Index: clangd/benchmarks/IndexBenchmark.cpp =================================================================== --- clangd/benchmarks/IndexBenchmark.cpp +++ clangd/benchmarks/IndexBenchmark.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "../index/SymbolYAML.h" +#include "../index/Serialization.h" #include "../index/dex/Dex.h" #include "benchmark/benchmark.h" #include "llvm/ADT/SmallVector.h" Index: clangd/index/Serialization.h =================================================================== --- clangd/index/Serialization.h +++ clangd/index/Serialization.h @@ -7,14 +7,18 @@ // //===----------------------------------------------------------------------===// // -// This file provides a compact binary serialization of indexed symbols. +// This file provides serialization of indexed symbols and other data. // -// It writes two sections: +// It writes sections: +// - metadata such as version info // - a string table (which is compressed) // - lists of encoded symbols // -// The format has a simple versioning scheme: the version is embedded in the -// data and non-current versions are rejected when reading. +// The format has a simple versioning scheme: the format version number is +// written in the file and non-current versions are rejected when reading. +// +// Human-readable YAML serialization is also supported, and recommended for +// debugging and experiments only. // //===----------------------------------------------------------------------===// @@ -23,25 +27,48 @@ #include "Index.h" #include "llvm/Support/Error.h" +namespace llvm { +namespace yaml { +class Input; +} +} // namespace llvm namespace clang { namespace clangd { +enum class IndexFileFormat { + RIFF, // Versioned binary format, suitable for production use. + YAML, // Human-readable format, suitable for experiments and debugging. +}; + // Specifies the contents of an index file to be written. struct IndexFileOut { const SymbolSlab *Symbols; // TODO: Support serializing symbol occurrences. // TODO: Support serializing Dex posting lists. + IndexFileFormat Format = IndexFileFormat::RIFF; }; // Serializes an index file. (This is a RIFF container chunk). -llvm::raw_ostream &operator<<(llvm::raw_ostream &, const IndexFileOut &); +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O); // Holds the contents of an index file that was read. struct IndexFileIn { llvm::Optional Symbols; + IndexFileFormat Format; }; // Parse an index file. The input must be a RIFF container chunk. llvm::Expected readIndexFile(llvm::StringRef); +std::string toYAML(const Symbol &); +// Returned symbol is backed by the YAML input. +// FIXME: this is only needed for IndexerMain, find a better solution. +llvm::Expected symbolFromYAML(llvm::yaml::Input &); + +// Build an in-memory static index from an index file. +// The size should be relatively small, so data can be managed in memory. +std::unique_ptr loadIndex(llvm::StringRef Filename, + llvm::ArrayRef URISchemes, + bool UseDex = true); + } // namespace clangd } // namespace clang Index: clangd/index/Serialization.cpp =================================================================== --- clangd/index/Serialization.cpp +++ clangd/index/Serialization.cpp @@ -9,6 +9,8 @@ #include "Serialization.h" #include "Index.h" #include "RIFF.h" +#include "Trace.h" +#include "dex/Dex.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -294,8 +296,6 @@ return Sym; } -} // namespace - // FILE ENCODING // A file is a RIFF chunk with type 'CdIx'. // It contains the sections: @@ -308,7 +308,7 @@ // data. Later we may want to support some backward compatibility. constexpr static uint32_t Version = 4; -Expected readIndexFile(StringRef Data) { +Expected readRIFF(StringRef Data) { auto RIFF = riff::readFile(Data); if (!RIFF) return RIFF.takeError(); @@ -343,7 +343,7 @@ return std::move(Result); } -raw_ostream &operator<<(raw_ostream &OS, const IndexFileOut &Data) { +void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) { assert(Data.Symbols && "An index file without symbols makes no sense!"); riff::File RIFF; RIFF.Type = riff::fourCC("CdIx"); @@ -377,7 +377,64 @@ } RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection}); - return OS << RIFF; + OS << RIFF; +} + +} // namespace + +// Defined in YAMLSerialization.cpp. +void writeYAML(const IndexFileOut &, raw_ostream &); +Expected readYAML(StringRef); + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) { + switch (O.Format) { + case IndexFileFormat::RIFF: + writeYAML(O, OS); + break; + case IndexFileFormat::YAML: + writeRIFF(O, OS); + break; + } + return OS; +} + +Expected readIndexFile(StringRef Data) { + if (Data.startswith("RIFF")) { + return readRIFF(Data); + } else if (auto YAMLContents = readYAML(Data)) { + return std::move(*YAMLContents); + } else { + return makeError("Not a RIFF file and failed to parse as YAML: " + + llvm::toString(YAMLContents.takeError())); + } +} + +std::unique_ptr loadIndex(llvm::StringRef SymbolFilename, + llvm::ArrayRef URISchemes, + bool UseDex) { + trace::Span OverallTracer("LoadIndex"); + auto Buffer = MemoryBuffer::getFile(SymbolFilename); + if (!Buffer) { + llvm::errs() << "Can't open " << SymbolFilename << "\n"; + return nullptr; + } + + SymbolSlab Symbols; + RefSlab Refs; + { + trace::Span Tracer("ParseIndex"); + if (auto I = readIndexFile(Buffer->get()->getBuffer())) { + if (I->Symbols) + Symbols = std::move(*I->Symbols); + } else { + llvm::errs() << "Bad Index: " << llvm::toString(I.takeError()) << "\n"; + return nullptr; + } + } + + trace::Span Tracer("BuildIndex"); + return UseDex ? dex::Dex::build(std::move(Symbols), URISchemes) + : MemIndex::build(std::move(Symbols), std::move(Refs)); } } // namespace clangd Index: clangd/index/SymbolYAML.h =================================================================== --- clangd/index/SymbolYAML.h +++ /dev/null @@ -1,54 +0,0 @@ -//===--- SymbolYAML.h --------------------------------------------*- C++-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// SymbolYAML provides facilities to convert Symbol to YAML, and vice versa. -// The YAML format of Symbol is designed for simplicity and experiment, but -// isn't a suitable/efficient store. -// -// This is for **experimental** only. Don't use it in the production code. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_FROM_YAML_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_FROM_YAML_H - -#include "Index.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/YAMLTraits.h" -#include "llvm/Support/raw_ostream.h" - -namespace clang { -namespace clangd { - -// Read symbols from a YAML-format string. -SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent); - -// Read one symbol from a YAML-stream. -// The returned symbol is backed by Input. -Symbol SymbolFromYAML(llvm::yaml::Input &Input); - -// Convert a single symbol to YAML-format string. -// The YAML result is safe to concatenate. -std::string SymbolToYAML(Symbol Sym); - -// Convert symbols to a YAML-format string. -// The YAML result is safe to concatenate if you have multiple symbol slabs. -void SymbolsToYAML(const SymbolSlab &Symbols, llvm::raw_ostream &OS); - -// Build an in-memory static index for global symbols from a symbol file. -// The size of global symbols should be relatively small, so that all symbols -// can be managed in memory. -std::unique_ptr loadIndex(llvm::StringRef SymbolFilename, - llvm::ArrayRef URISchemes, - bool UseDex = true); - -} // namespace clangd -} // namespace clang - -#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_FROM_YAML_H Index: clangd/index/YAMLSerialization.cpp =================================================================== --- clangd/index/YAMLSerialization.cpp +++ clangd/index/YAMLSerialization.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#include "SymbolYAML.h" #include "Index.h" #include "Serialization.h" #include "Trace.h" @@ -16,10 +15,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Errc.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" #include -LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(clang::clangd::Symbol) LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Symbol::IncludeHeaderWithReferences) namespace llvm { @@ -27,8 +26,8 @@ using clang::clangd::Symbol; using clang::clangd::SymbolID; -using clang::clangd::SymbolOrigin; using clang::clangd::SymbolLocation; +using clang::clangd::SymbolOrigin; using clang::index::SymbolInfo; using clang::index::SymbolKind; using clang::index::SymbolLanguage; @@ -186,65 +185,45 @@ namespace clang { namespace clangd { -SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent) { - llvm::yaml::Input Yin(YAMLContent); - std::vector S; - Yin >> S; - - SymbolSlab::Builder Syms; - for (auto &Sym : S) - Syms.insert(Sym); - return std::move(Syms).build(); -} - -Symbol SymbolFromYAML(llvm::yaml::Input &Input) { - Symbol S; - Input >> S; - return S; -} - -void SymbolsToYAML(const SymbolSlab &Symbols, llvm::raw_ostream &OS) { +void writeYAML(const IndexFileOut &O, raw_ostream &OS) { llvm::yaml::Output Yout(OS); - for (Symbol S : Symbols) // copy: Yout<< requires mutability. - Yout << S; + for (Symbol Sym : *O.Symbols) // copy: Yout<< requires mutability. + Yout << Sym; } -std::string SymbolToYAML(Symbol Sym) { - std::string Str; - llvm::raw_string_ostream OS(Str); - llvm::yaml::Output Yout(OS); - Yout << Sym; - return OS.str(); +Expected readYAML(StringRef Data) { + SymbolSlab::Builder Symbols; + llvm::yaml::Input Yin(Data); + do { + Symbol S; + Yin >> S; + if (Yin.error()) + return llvm::errorCodeToError(Yin.error()); + Symbols.insert(S); + } while (Yin.nextDocument()); + + IndexFileIn Result; + Result.Symbols.emplace(std::move(Symbols).build()); + return std::move(Result); } -std::unique_ptr loadIndex(llvm::StringRef SymbolFilename, - llvm::ArrayRef URISchemes, - bool UseDex) { - trace::Span OverallTracer("LoadIndex"); - auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename); - if (!Buffer) { - llvm::errs() << "Can't open " << SymbolFilename << "\n"; - return nullptr; - } - StringRef Data = Buffer->get()->getBuffer(); - - llvm::Optional Slab; - if (Data.startswith("RIFF")) { // Magic for binary index file. - trace::Span Tracer("ParseRIFF"); - if (auto RIFF = readIndexFile(Data)) - Slab = std::move(RIFF->Symbols); - else - llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n"; - } else { - trace::Span Tracer("ParseYAML"); - Slab = symbolsFromYAML(Data); +std::string toYAML(const Symbol &S) { + std::string Buf; + { + llvm::raw_string_ostream OS(Buf); + llvm::yaml::Output Yout(OS); + Symbol Sym = S; // copy: Yout<< requires mutability. + OS << Sym; } + return Buf; +} - if (!Slab) - return nullptr; - trace::Span Tracer("BuildIndex"); - return UseDex ? dex::Dex::build(std::move(*Slab), URISchemes) - : MemIndex::build(std::move(*Slab), RefSlab()); +Expected symbolFromYAML(llvm::yaml::Input &Yin) { + Symbol S; + Yin >> S; + if (Yin.error()) + return llvm::errorCodeToError(Yin.error()); + return S; } } // namespace clangd Index: clangd/index/dex/dexp/Dexp.cpp =================================================================== --- clangd/index/dex/dexp/Dexp.cpp +++ clangd/index/dex/dexp/Dexp.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "../../../index/SymbolYAML.h" +#include "../../Serialization.h" #include "../Dex.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -155,7 +155,7 @@ bool FoundSymbol = false; Index->lookup(Request, [&](const Symbol &Sym) { FoundSymbol = true; - llvm::outs() << SymbolToYAML(Sym); + llvm::outs() << toYAML(Sym); }); if (!FoundSymbol) llvm::outs() << "not found\n"; Index: clangd/indexer/IndexerMain.cpp =================================================================== --- clangd/indexer/IndexerMain.cpp +++ clangd/indexer/IndexerMain.cpp @@ -18,7 +18,6 @@ #include "index/Merge.h" #include "index/Serialization.h" #include "index/SymbolCollector.h" -#include "index/SymbolYAML.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Index/IndexDataConsumer.h" @@ -60,12 +59,13 @@ "MapReduce."), llvm::cl::init(true), llvm::cl::Hidden); -enum IndexFormat { YAML, Binary }; -static llvm::cl::opt Format( - "format", llvm::cl::desc("Format of the index to be written"), - llvm::cl::values(clEnumValN(YAML, "yaml", "human-readable YAML format"), - clEnumValN(Binary, "binary", "binary RIFF format")), - llvm::cl::init(YAML)); +static llvm::cl::opt + Format("format", llvm::cl::desc("Format of the index to be written"), + llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml", + "human-readable YAML format"), + clEnumValN(IndexFileFormat::RIFF, "binary", + "binary RIFF format")), + llvm::cl::init(IndexFileFormat::YAML)); /// Responsible for aggregating symbols from each processed file and producing /// the final results. All methods in this class must be thread-safe, @@ -162,8 +162,7 @@ void consumeSymbols(SymbolSlab Symbols) override { for (const auto &Sym : Symbols) - Executor.getExecutionContext()->reportResult(Sym.ID.str(), - SymbolToYAML(Sym)); + Executor.getExecutionContext()->reportResult(Sym.ID.str(), toYAML(Sym)); } SymbolSlab mergeResults() override { @@ -171,7 +170,7 @@ Executor.getToolResults()->forEachResult( [&](llvm::StringRef Key, llvm::StringRef Value) { llvm::yaml::Input Yin(Value); - auto Sym = clang::clangd::SymbolFromYAML(Yin); + auto Sym = cantFail(clang::clangd::symbolFromYAML(Yin)); auto ID = cantFail(clang::clangd::SymbolID::fromStr(Key)); if (const auto *Existing = UniqueSymbols.find(ID)) UniqueSymbols.insert(mergeSymbol(*Existing, Sym)); @@ -270,15 +269,9 @@ // Reduce phase: combine symbols with the same IDs. auto UniqueSymbols = Consumer->mergeResults(); // Output phase: emit result symbols. - switch (clang::clangd::Format) { - case clang::clangd::IndexFormat::YAML: - SymbolsToYAML(UniqueSymbols, llvm::outs()); - break; - case clang::clangd::IndexFormat::Binary: { - clang::clangd::IndexFileOut Out; - Out.Symbols = &UniqueSymbols; - llvm::outs() << Out; - } - } + clang::clangd::IndexFileOut Out; + Out.Symbols = &UniqueSymbols; + Out.Format = clang::clangd::Format; + llvm::outs() << Out; return 0; } Index: clangd/tool/ClangdMain.cpp =================================================================== --- clangd/tool/ClangdMain.cpp +++ clangd/tool/ClangdMain.cpp @@ -12,7 +12,7 @@ #include "Path.h" #include "RIFF.h" #include "Trace.h" -#include "index/SymbolYAML.h" +#include "index/Serialization.h" #include "clang/Basic/Version.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" Index: unittests/clangd/SerializationTests.cpp =================================================================== --- unittests/clangd/SerializationTests.cpp +++ unittests/clangd/SerializationTests.cpp @@ -9,18 +9,18 @@ #include "index/Index.h" #include "index/Serialization.h" -#include "index/SymbolYAML.h" #include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +using testing::AllOf; using testing::UnorderedElementsAre; using testing::UnorderedElementsAreArray; namespace clang { namespace clangd { namespace { -const char *YAML1 = R"( +const char *YAML = R"( --- ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 Name: 'Foo1' @@ -46,9 +46,6 @@ - Header: 'include2' References: 3 ... -)"; - -const char *YAML2 = R"( --- ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858 Name: 'Foo2' @@ -70,15 +67,29 @@ ... )"; +MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); } MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; } MATCHER_P2(IncludeHeaderWithRef, IncludeHeader, References, "") { return (arg.IncludeHeader == IncludeHeader) && (arg.References == References); } TEST(SerializationTest, YAMLConversions) { - auto Symbols1 = symbolsFromYAML(YAML1); - ASSERT_EQ(Symbols1.size(), 1u); - const auto &Sym1 = *Symbols1.begin(); + auto In = readIndexFile(YAML); + EXPECT_TRUE(bool(In)) << In.takeError(); + + auto ParsedYAML = readIndexFile(YAML); + ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError(); + ASSERT_TRUE(bool(ParsedYAML->Symbols)); + EXPECT_THAT( + *ParsedYAML->Symbols, + UnorderedElementsAre(ID("057557CEBF6E6B2DD437FBF60CC58F352D1DF856"), + ID("057557CEBF6E6B2DD437FBF60CC58F352D1DF858"))); + + auto Sym1 = *ParsedYAML->Symbols->find( + cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F352D1DF856"))); + auto Sym2 = *ParsedYAML->Symbols->find( + cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F352D1DF858"))); + EXPECT_THAT(Sym1, QName("clang::Foo1")); EXPECT_EQ(Sym1.Signature, ""); EXPECT_EQ(Sym1.Documentation, "Foo doc"); @@ -91,51 +102,38 @@ UnorderedElementsAre(IncludeHeaderWithRef("include1", 7u), IncludeHeaderWithRef("include2", 3u))); - auto Symbols2 = symbolsFromYAML(YAML2); - ASSERT_EQ(Symbols2.size(), 1u); - const auto &Sym2 = *Symbols2.begin(); EXPECT_THAT(Sym2, QName("clang::Foo2")); EXPECT_EQ(Sym2.Signature, "-sig"); EXPECT_EQ(Sym2.ReturnType, ""); EXPECT_EQ(Sym2.CanonicalDeclaration.FileURI, "file:///path/bar.h"); EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion); EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated); - - std::string ConcatenatedYAML; - { - llvm::raw_string_ostream OS(ConcatenatedYAML); - SymbolsToYAML(Symbols1, OS); - SymbolsToYAML(Symbols2, OS); - } - auto ConcatenatedSymbols = symbolsFromYAML(ConcatenatedYAML); - EXPECT_THAT(ConcatenatedSymbols, - UnorderedElementsAre(QName("clang::Foo1"), QName("clang::Foo2"))); } std::vector YAMLFromSymbols(const SymbolSlab &Slab) { std::vector Result; for (const auto &Sym : Slab) - Result.push_back(SymbolToYAML(Sym)); + Result.push_back(toYAML(Sym)); return Result; } TEST(SerializationTest, BinaryConversions) { - // We reuse the test symbols from YAML. - auto Slab = symbolsFromYAML(std::string(YAML1) + YAML2); - ASSERT_EQ(Slab.size(), 2u); + auto In = readIndexFile(YAML); + EXPECT_TRUE(bool(In)) << In.takeError(); // Write to binary format, and parse again. IndexFileOut Out; - Out.Symbols = &Slab; + Out.Symbols = In->Symbols.getPointer(); + Out.Format = IndexFileFormat::RIFF; std::string Serialized = llvm::to_string(Out); - auto In = readIndexFile(Serialized); - ASSERT_TRUE(bool(In)) << In.takeError(); + auto In2 = readIndexFile(Serialized); + ASSERT_TRUE(bool(In2)) << In.takeError(); ASSERT_TRUE(In->Symbols); // Assert the YAML serializations match, for nice comparisons and diffs. - EXPECT_THAT(YAMLFromSymbols(*In->Symbols), - UnorderedElementsAreArray(YAMLFromSymbols(Slab))); + EXPECT_THAT(YAMLFromSymbols(*In2->Symbols), + UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); } } // namespace Index: unittests/clangd/SymbolCollectorTests.cpp =================================================================== --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -11,7 +11,6 @@ #include "TestFS.h" #include "TestTU.h" #include "index/SymbolCollector.h" -#include "index/SymbolYAML.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/FileSystemOptions.h" #include "clang/Basic/VirtualFileSystem.h"