Index: clangd/CMakeLists.txt =================================================================== --- clangd/CMakeLists.txt +++ clangd/CMakeLists.txt @@ -63,6 +63,7 @@ index/MemIndex.cpp index/Merge.cpp index/Serialization.cpp + index/Symbol.cpp index/SymbolCollector.cpp index/SymbolID.cpp index/SymbolLocation.cpp Index: clangd/CodeComplete.h =================================================================== --- clangd/CodeComplete.h +++ clangd/CodeComplete.h @@ -21,6 +21,7 @@ #include "Path.h" #include "Protocol.h" #include "index/Index.h" +#include "index/Symbol.h" #include "index/SymbolOrigin.h" #include "clang/Frontend/PrecompiledPreamble.h" #include "clang/Sema/CodeCompleteConsumer.h" Index: clangd/CodeComplete.cpp =================================================================== --- clangd/CodeComplete.cpp +++ clangd/CodeComplete.cpp @@ -34,6 +34,7 @@ #include "Trace.h" #include "URI.h" #include "index/Index.h" +#include "index/Symbol.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/Basic/LangOptions.h" Index: clangd/Headers.h =================================================================== --- clangd/Headers.h +++ clangd/Headers.h @@ -12,7 +12,7 @@ #include "Path.h" #include "Protocol.h" #include "SourceCode.h" -#include "index/Index.h" +#include "index/Symbol.h" #include "clang/Format/Format.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/PPCallbacks.h" Index: clangd/IncludeFixer.h =================================================================== --- clangd/IncludeFixer.h +++ clangd/IncludeFixer.h @@ -12,6 +12,7 @@ #include "Diagnostics.h" #include "Headers.h" #include "index/Index.h" +#include "index/Symbol.h" #include "clang/AST/Type.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceLocation.h" Index: clangd/IncludeFixer.cpp =================================================================== --- clangd/IncludeFixer.cpp +++ clangd/IncludeFixer.cpp @@ -13,6 +13,7 @@ #include "SourceCode.h" #include "Trace.h" #include "index/Index.h" +#include "index/Symbol.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/NestedNameSpecifier.h" Index: clangd/index/Background.cpp =================================================================== --- clangd/index/Background.cpp +++ clangd/index/Background.cpp @@ -11,6 +11,7 @@ #include "Compiler.h" #include "Logger.h" #include "SourceCode.h" +#include "Symbol.h" #include "Threading.h" #include "Trace.h" #include "URI.h" Index: clangd/index/FileIndex.h =================================================================== --- clangd/index/FileIndex.h +++ clangd/index/FileIndex.h @@ -20,6 +20,7 @@ #include "MemIndex.h" #include "Merge.h" #include "index/CanonicalIncludes.h" +#include "index/Symbol.h" #include "clang/Lex/Preprocessor.h" #include Index: clangd/index/Index.h =================================================================== --- clangd/index/Index.h +++ clangd/index/Index.h @@ -10,9 +10,9 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H #include "ExpectedTypes.h" +#include "Symbol.h" #include "SymbolID.h" #include "SymbolLocation.h" -#include "SymbolOrigin.h" #include "clang/Index/IndexSymbol.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/DenseMap.h" @@ -22,7 +22,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/JSON.h" -#include "llvm/Support/StringSaver.h" #include #include #include @@ -32,207 +31,6 @@ namespace clang { namespace clangd { -// The class presents a C++ symbol, e.g. class, function. -// -// WARNING: Symbols do not own much of their underlying data - typically strings -// are owned by a SymbolSlab. They should be treated as non-owning references. -// Copies are shallow. -// When adding new unowned data fields to Symbol, remember to update: -// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage. -// - mergeSymbol in Merge.cpp, to properly combine two Symbols. -// -// A fully documented symbol can be split as: -// size_type std::map::count(const K& key) const -// | Return | Scope |Name| Signature | -// We split up these components to allow display flexibility later. -struct Symbol { - // The ID of the symbol. - SymbolID ID; - // The symbol information, like symbol kind. - index::SymbolInfo SymInfo; - // The unqualified name of the symbol, e.g. "bar" (for ns::bar). - llvm::StringRef Name; - // The containing namespace. e.g. "" (global), "ns::" (top-level namespace). - llvm::StringRef Scope; - // The location of the symbol's definition, if one was found. - // This just covers the symbol name (e.g. without class/function body). - SymbolLocation Definition; - // The location of the preferred declaration of the symbol. - // This just covers the symbol name. - // This may be the same as Definition. - // - // A C++ symbol may have multiple declarations, and we pick one to prefer. - // * For classes, the canonical declaration should be the definition. - // * For non-inline functions, the canonical declaration typically appears - // in the ".h" file corresponding to the definition. - SymbolLocation CanonicalDeclaration; - // The number of translation units that reference this symbol from their main - // file. This number is only meaningful if aggregated in an index. - unsigned References = 0; - /// Where this symbol came from. Usually an index provides a constant value. - SymbolOrigin Origin = SymbolOrigin::Unknown; - /// A brief description of the symbol that can be appended in the completion - /// candidate list. For example, "(X x, Y y) const" is a function signature. - /// Only set when the symbol is indexed for completion. - llvm::StringRef Signature; - /// What to insert when completing this symbol, after the symbol name. - /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function). - /// (When snippets are disabled, the symbol name alone is used). - /// Only set when the symbol is indexed for completion. - llvm::StringRef CompletionSnippetSuffix; - /// Documentation including comment for the symbol declaration. - llvm::StringRef Documentation; - /// Type when this symbol is used in an expression. (Short display form). - /// e.g. return type of a function, or type of a variable. - /// Only set when the symbol is indexed for completion. - llvm::StringRef ReturnType; - - /// Raw representation of the OpaqueType of the symbol, used for scoring - /// purposes. - /// Only set when the symbol is indexed for completion. - llvm::StringRef Type; - - struct IncludeHeaderWithReferences { - IncludeHeaderWithReferences() = default; - - IncludeHeaderWithReferences(llvm::StringRef IncludeHeader, - unsigned References) - : IncludeHeader(IncludeHeader), References(References) {} - - /// This can be either a URI of the header to be #include'd - /// for this symbol, or a literal header quoted with <> or "" that is - /// suitable to be included directly. When it is a URI, the exact #include - /// path needs to be calculated according to the URI scheme. - /// - /// Note that the include header is a canonical include for the symbol and - /// can be different from FileURI in the CanonicalDeclaration. - llvm::StringRef IncludeHeader = ""; - /// The number of translation units that reference this symbol and include - /// this header. This number is only meaningful if aggregated in an index. - unsigned References = 0; - }; - /// One Symbol can potentially be incuded via different headers. - /// - If we haven't seen a definition, this covers all declarations. - /// - If we have seen a definition, this covers declarations visible from - /// any definition. - /// Only set when the symbol is indexed for completion. - llvm::SmallVector IncludeHeaders; - - enum SymbolFlag : uint8_t { - None = 0, - /// Whether or not this symbol is meant to be used for the code completion. - /// See also isIndexedForCodeCompletion(). - /// Note that we don't store completion information (signature, snippet, - /// type, inclues) if the symbol is not indexed for code completion. - IndexedForCodeCompletion = 1 << 0, - /// Indicates if the symbol is deprecated. - Deprecated = 1 << 1, - // Symbol is an implementation detail. - ImplementationDetail = 1 << 2, - // Symbol is visible to other files (not e.g. a static helper function). - VisibleOutsideFile = 1 << 3, - }; - - SymbolFlag Flags = SymbolFlag::None; - /// FIXME: also add deprecation message and fixit? -}; -inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A, Symbol::SymbolFlag B) { - return static_cast(static_cast(A) | - static_cast(B)); -} -inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A, Symbol::SymbolFlag B) { - return A = A | B; -} -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S); -raw_ostream &operator<<(raw_ostream &, Symbol::SymbolFlag); - -// Invokes Callback with each StringRef& contained in the Symbol. -// Useful for deduplicating backing strings. -template void visitStrings(Symbol &S, const Callback &CB) { - CB(S.Name); - CB(S.Scope); - CB(S.Signature); - CB(S.CompletionSnippetSuffix); - CB(S.Documentation); - CB(S.ReturnType); - CB(S.Type); - auto RawCharPointerCB = [&CB](const char *&P) { - llvm::StringRef S(P); - CB(S); - assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated"); - P = S.data(); - }; - RawCharPointerCB(S.CanonicalDeclaration.FileURI); - RawCharPointerCB(S.Definition.FileURI); - - for (auto &Include : S.IncludeHeaders) - CB(Include.IncludeHeader); -} - -// Computes query-independent quality score for a Symbol. -// This currently falls in the range [1, ln(#indexed documents)]. -// FIXME: this should probably be split into symbol -> signals -// and signals -> score, so it can be reused for Sema completions. -float quality(const Symbol &S); - -// An immutable symbol container that stores a set of symbols. -// The container will maintain the lifetime of the symbols. -class SymbolSlab { -public: - using const_iterator = std::vector::const_iterator; - using iterator = const_iterator; - using value_type = Symbol; - - SymbolSlab() = default; - - const_iterator begin() const { return Symbols.begin(); } - const_iterator end() const { return Symbols.end(); } - const_iterator find(const SymbolID &SymID) const; - - size_t size() const { return Symbols.size(); } - bool empty() const { return Symbols.empty(); } - // Estimates the total memory usage. - size_t bytes() const { - return sizeof(*this) + Arena.getTotalMemory() + - Symbols.capacity() * sizeof(Symbol); - } - - // SymbolSlab::Builder is a mutable container that can 'freeze' to SymbolSlab. - // The frozen SymbolSlab will use less memory. - class Builder { - public: - Builder() : UniqueStrings(Arena) {} - - // Adds a symbol, overwriting any existing one with the same ID. - // This is a deep copy: underlying strings will be owned by the slab. - void insert(const Symbol &S); - - // Returns the symbol with an ID, if it exists. Valid until next insert(). - const Symbol *find(const SymbolID &ID) { - auto I = SymbolIndex.find(ID); - return I == SymbolIndex.end() ? nullptr : &Symbols[I->second]; - } - - // Consumes the builder to finalize the slab. - SymbolSlab build() &&; - - private: - llvm::BumpPtrAllocator Arena; - // Intern table for strings. Contents are on the arena. - llvm::UniqueStringSaver UniqueStrings; - std::vector Symbols; - // Values are indices into Symbols vector. - llvm::DenseMap SymbolIndex; - }; - -private: - SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector Symbols) - : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} - - llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not. - std::vector Symbols; // Sorted by SymbolID to allow lookup. -}; - // Describes the kind of a cross-reference. // // This is a bitfield which can be combined from different kinds. Index: clangd/index/Index.cpp =================================================================== --- clangd/index/Index.cpp +++ clangd/index/Index.cpp @@ -16,67 +16,6 @@ namespace clang { namespace clangd { -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag F) { - if (F == Symbol::None) - return OS << "None"; - std::string S; - if (F & Symbol::Deprecated) - S += "deprecated|"; - if (F & Symbol::IndexedForCodeCompletion) - S += "completion|"; - return OS << llvm::StringRef(S).rtrim('|'); -} - -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) { - return OS << S.Scope << S.Name; -} - -float quality(const Symbol &S) { - // This avoids a sharp gradient for tail symbols, and also neatly avoids the - // question of whether 0 references means a bad symbol or missing data. - if (S.References < 3) - return 1; - return std::log(S.References); -} - -SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const { - auto It = std::lower_bound( - Symbols.begin(), Symbols.end(), ID, - [](const Symbol &S, const SymbolID &I) { return S.ID < I; }); - if (It != Symbols.end() && It->ID == ID) - return It; - return Symbols.end(); -} - -// Copy the underlying data of the symbol into the owned arena. -static void own(Symbol &S, llvm::UniqueStringSaver &Strings) { - visitStrings(S, [&](llvm::StringRef &V) { V = Strings.save(V); }); -} - -void SymbolSlab::Builder::insert(const Symbol &S) { - auto R = SymbolIndex.try_emplace(S.ID, Symbols.size()); - if (R.second) { - Symbols.push_back(S); - own(Symbols.back(), UniqueStrings); - } else { - auto &Copy = Symbols[R.first->second] = S; - own(Copy, UniqueStrings); - } -} - -SymbolSlab SymbolSlab::Builder::build() && { - Symbols = {Symbols.begin(), Symbols.end()}; // Force shrink-to-fit. - // Sort symbols so the slab can binary search over them. - llvm::sort(Symbols, - [](const Symbol &L, const Symbol &R) { return L.ID < R.ID; }); - // We may have unused strings from overwritten symbols. Build a new arena. - llvm::BumpPtrAllocator NewArena; - llvm::UniqueStringSaver Strings(NewArena); - for (auto &S : Symbols) - own(S, Strings); - return SymbolSlab(std::move(NewArena), std::move(Symbols)); -} - llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, RefKind K) { if (K == RefKind::Unknown) return OS << "Unknown"; Index: clangd/index/Merge.cpp =================================================================== --- clangd/index/Merge.cpp +++ clangd/index/Merge.cpp @@ -9,6 +9,7 @@ #include "Merge.h" #include "Logger.h" #include "Trace.h" +#include "index/Symbol.h" #include "index/SymbolLocation.h" #include "index/SymbolOrigin.h" #include "llvm/ADT/STLExtras.h" Index: clangd/index/Serialization.h =================================================================== --- clangd/index/Serialization.h +++ clangd/index/Serialization.h @@ -26,6 +26,7 @@ #include "Headers.h" #include "Index.h" +#include "index/Symbol.h" #include "llvm/Support/Error.h" namespace clang { Index: clangd/index/Symbol.h =================================================================== --- clangd/index/Symbol.h +++ clangd/index/Symbol.h @@ -0,0 +1,231 @@ +//===--- Symbol.h ------------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H + +#include "SymbolID.h" +#include "SymbolLocation.h" +#include "SymbolOrigin.h" +#include "clang/Index/IndexSymbol.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/StringSaver.h" + +namespace clang { +namespace clangd { + +/// The class presents a C++ symbol, e.g. class, function. +/// +/// WARNING: Symbols do not own much of their underlying data - typically +/// strings are owned by a SymbolSlab. They should be treated as non-owning +/// references. Copies are shallow. +/// +/// When adding new unowned data fields to Symbol, remember to update: +/// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage. +/// - mergeSymbol in Merge.cpp, to properly combine two Symbols. +/// +/// A fully documented symbol can be split as: +/// size_type std::map::count(const K& key) const +/// | Return | Scope |Name| Signature | +/// We split up these components to allow display flexibility later. +struct Symbol { + /// The ID of the symbol. + SymbolID ID; + /// The symbol information, like symbol kind. + index::SymbolInfo SymInfo; + /// The unqualified name of the symbol, e.g. "bar" (for ns::bar). + llvm::StringRef Name; + /// The containing namespace. e.g. "" (global), "ns::" (top-level namespace). + llvm::StringRef Scope; + /// The location of the symbol's definition, if one was found. + /// This just covers the symbol name (e.g. without class/function body). + SymbolLocation Definition; + /// The location of the preferred declaration of the symbol. + /// This just covers the symbol name. + /// This may be the same as Definition. + /// + /// A C++ symbol may have multiple declarations, and we pick one to prefer. + /// * For classes, the canonical declaration should be the definition. + /// * For non-inline functions, the canonical declaration typically appears + /// in the ".h" file corresponding to the definition. + SymbolLocation CanonicalDeclaration; + /// The number of translation units that reference this symbol from their main + /// file. This number is only meaningful if aggregated in an index. + unsigned References = 0; + /// Where this symbol came from. Usually an index provides a constant value. + SymbolOrigin Origin = SymbolOrigin::Unknown; + /// A brief description of the symbol that can be appended in the completion + /// candidate list. For example, "(X x, Y y) const" is a function signature. + /// Only set when the symbol is indexed for completion. + llvm::StringRef Signature; + /// What to insert when completing this symbol, after the symbol name. + /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function). + /// (When snippets are disabled, the symbol name alone is used). + /// Only set when the symbol is indexed for completion. + llvm::StringRef CompletionSnippetSuffix; + /// Documentation including comment for the symbol declaration. + llvm::StringRef Documentation; + /// Type when this symbol is used in an expression. (Short display form). + /// e.g. return type of a function, or type of a variable. + /// Only set when the symbol is indexed for completion. + llvm::StringRef ReturnType; + + /// Raw representation of the OpaqueType of the symbol, used for scoring + /// purposes. + /// Only set when the symbol is indexed for completion. + llvm::StringRef Type; + + struct IncludeHeaderWithReferences { + IncludeHeaderWithReferences() = default; + + IncludeHeaderWithReferences(llvm::StringRef IncludeHeader, + unsigned References) + : IncludeHeader(IncludeHeader), References(References) {} + + /// This can be either a URI of the header to be #include'd + /// for this symbol, or a literal header quoted with <> or "" that is + /// suitable to be included directly. When it is a URI, the exact #include + /// path needs to be calculated according to the URI scheme. + /// + /// Note that the include header is a canonical include for the symbol and + /// can be different from FileURI in the CanonicalDeclaration. + llvm::StringRef IncludeHeader = ""; + /// The number of translation units that reference this symbol and include + /// this header. This number is only meaningful if aggregated in an index. + unsigned References = 0; + }; + /// One Symbol can potentially be incuded via different headers. + /// - If we haven't seen a definition, this covers all declarations. + /// - If we have seen a definition, this covers declarations visible from + /// any definition. + /// Only set when the symbol is indexed for completion. + llvm::SmallVector IncludeHeaders; + + enum SymbolFlag : uint8_t { + None = 0, + /// Whether or not this symbol is meant to be used for the code completion. + /// See also isIndexedForCodeCompletion(). + /// Note that we don't store completion information (signature, snippet, + /// type, inclues) if the symbol is not indexed for code completion. + IndexedForCodeCompletion = 1 << 0, + /// Indicates if the symbol is deprecated. + Deprecated = 1 << 1, + /// Symbol is an implementation detail. + ImplementationDetail = 1 << 2, + /// Symbol is visible to other files (not e.g. a static helper function). + VisibleOutsideFile = 1 << 3, + }; + + SymbolFlag Flags = SymbolFlag::None; + /// FIXME: also add deprecation message and fixit? +}; + +inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A, + Symbol::SymbolFlag B) { + return static_cast(static_cast(A) | + static_cast(B)); +} +inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A, + Symbol::SymbolFlag B) { + return A = A | B; +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S); +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag); + +/// Invokes Callback with each StringRef& contained in the Symbol. +/// Useful for deduplicating backing strings. +template void visitStrings(Symbol &S, const Callback &CB) { + CB(S.Name); + CB(S.Scope); + CB(S.Signature); + CB(S.CompletionSnippetSuffix); + CB(S.Documentation); + CB(S.ReturnType); + CB(S.Type); + auto RawCharPointerCB = [&CB](const char *&P) { + llvm::StringRef S(P); + CB(S); + assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated"); + P = S.data(); + }; + RawCharPointerCB(S.CanonicalDeclaration.FileURI); + RawCharPointerCB(S.Definition.FileURI); + + for (auto &Include : S.IncludeHeaders) + CB(Include.IncludeHeader); +} + +/// Computes query-independent quality score for a Symbol. +/// This currently falls in the range [1, ln(#indexed documents)]. +/// FIXME: this should probably be split into symbol -> signals +/// and signals -> score, so it can be reused for Sema completions. +float quality(const Symbol &S); + +/// An immutable symbol container that stores a set of symbols. +/// The container will maintain the lifetime of the symbols. +class SymbolSlab { +public: + using const_iterator = std::vector::const_iterator; + using iterator = const_iterator; + using value_type = Symbol; + + SymbolSlab() = default; + + const_iterator begin() const { return Symbols.begin(); } + const_iterator end() const { return Symbols.end(); } + const_iterator find(const SymbolID &SymID) const; + + size_t size() const { return Symbols.size(); } + bool empty() const { return Symbols.empty(); } + // Estimates the total memory usage. + size_t bytes() const { + return sizeof(*this) + Arena.getTotalMemory() + + Symbols.capacity() * sizeof(Symbol); + } + + /// SymbolSlab::Builder is a mutable container that can 'freeze' to + /// SymbolSlab. The frozen SymbolSlab will use less memory. + class Builder { + public: + Builder() : UniqueStrings(Arena) {} + + /// Adds a symbol, overwriting any existing one with the same ID. + /// This is a deep copy: underlying strings will be owned by the slab. + void insert(const Symbol &S); + + /// Returns the symbol with an ID, if it exists. Valid until next insert(). + const Symbol *find(const SymbolID &ID) { + auto I = SymbolIndex.find(ID); + return I == SymbolIndex.end() ? nullptr : &Symbols[I->second]; + } + + /// Consumes the builder to finalize the slab. + SymbolSlab build() &&; + + private: + llvm::BumpPtrAllocator Arena; + /// Intern table for strings. Contents are on the arena. + llvm::UniqueStringSaver UniqueStrings; + std::vector Symbols; + /// Values are indices into Symbols vector. + llvm::DenseMap SymbolIndex; + }; + +private: + SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector Symbols) + : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} + + llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not. + std::vector Symbols; // Sorted by SymbolID to allow lookup. +}; + +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H Index: clangd/index/Symbol.cpp =================================================================== --- clangd/index/Symbol.cpp +++ clangd/index/Symbol.cpp @@ -0,0 +1,76 @@ +//===--- Symbol.cpp ----------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Symbol.h" + +namespace clang { +namespace clangd { + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag F) { + if (F == Symbol::None) + return OS << "None"; + std::string S; + if (F & Symbol::Deprecated) + S += "deprecated|"; + if (F & Symbol::IndexedForCodeCompletion) + S += "completion|"; + return OS << llvm::StringRef(S).rtrim('|'); +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) { + return OS << S.Scope << S.Name; +} + +float quality(const Symbol &S) { + // This avoids a sharp gradient for tail symbols, and also neatly avoids the + // question of whether 0 references means a bad symbol or missing data. + if (S.References < 3) + return 1; + return std::log(S.References); +} + +SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const { + auto It = std::lower_bound( + Symbols.begin(), Symbols.end(), ID, + [](const Symbol &S, const SymbolID &I) { return S.ID < I; }); + if (It != Symbols.end() && It->ID == ID) + return It; + return Symbols.end(); +} + +// Copy the underlying data of the symbol into the owned arena. +static void own(Symbol &S, llvm::UniqueStringSaver &Strings) { + visitStrings(S, [&](llvm::StringRef &V) { V = Strings.save(V); }); +} + +void SymbolSlab::Builder::insert(const Symbol &S) { + auto R = SymbolIndex.try_emplace(S.ID, Symbols.size()); + if (R.second) { + Symbols.push_back(S); + own(Symbols.back(), UniqueStrings); + } else { + auto &Copy = Symbols[R.first->second] = S; + own(Copy, UniqueStrings); + } +} + +SymbolSlab SymbolSlab::Builder::build() && { + Symbols = {Symbols.begin(), Symbols.end()}; // Force shrink-to-fit. + // Sort symbols so the slab can binary search over them. + llvm::sort(Symbols, + [](const Symbol &L, const Symbol &R) { return L.ID < R.ID; }); + // We may have unused strings from overwritten symbols. Build a new arena. + llvm::BumpPtrAllocator NewArena; + llvm::UniqueStringSaver Strings(NewArena); + for (auto &S : Symbols) + own(S, Strings); + return SymbolSlab(std::move(NewArena), std::move(Symbols)); +} + +} // namespace clangd +} // namespace clang