diff --git a/clang-tools-extra/clangd/AST.h b/clang-tools-extra/clangd/AST.h --- a/clang-tools-extra/clangd/AST.h +++ b/clang-tools-extra/clangd/AST.h @@ -13,9 +13,10 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_AST_H_ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_AST_H_ -#include "index/Index.h" +#include "index/SymbolID.h" #include "clang/AST/Decl.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Lex/MacroInfo.h" namespace clang { class SourceManager; diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -62,6 +62,7 @@ index/IndexAction.cpp index/MemIndex.cpp index/Merge.cpp + index/Ref.cpp index/Serialization.cpp index/Symbol.cpp index/SymbolCollector.cpp diff --git a/clang-tools-extra/clangd/IncludeFixer.cpp b/clang-tools-extra/clangd/IncludeFixer.cpp --- a/clang-tools-extra/clangd/IncludeFixer.cpp +++ b/clang-tools-extra/clangd/IncludeFixer.cpp @@ -24,6 +24,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" +#include "clang/Lex/Lexer.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/Scope.h" diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp --- a/clang-tools-extra/clangd/Protocol.cpp +++ b/clang-tools-extra/clangd/Protocol.cpp @@ -13,7 +13,6 @@ #include "Protocol.h" #include "Logger.h" #include "URI.h" -#include "index/Index.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallString.h" diff --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp --- a/clang-tools-extra/clangd/Quality.cpp +++ b/clang-tools-extra/clangd/Quality.cpp @@ -5,11 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + #include "Quality.h" #include "AST.h" #include "FileDistance.h" #include "URI.h" -#include "index/Index.h" +#include "index/Symbol.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" diff --git a/clang-tools-extra/clangd/index/Index.h b/clang-tools-extra/clangd/index/Index.h --- a/clang-tools-extra/clangd/index/Index.h +++ b/clang-tools-extra/clangd/index/Index.h @@ -9,116 +9,19 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H -#include "ExpectedTypes.h" +#include "Ref.h" #include "Symbol.h" #include "SymbolID.h" -#include "SymbolLocation.h" -#include "clang/Index/IndexSymbol.h" -#include "clang/Lex/Lexer.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" #include "llvm/Support/JSON.h" -#include -#include #include #include -#include namespace clang { namespace clangd { -// Describes the kind of a cross-reference. -// -// This is a bitfield which can be combined from different kinds. -enum class RefKind : uint8_t { - Unknown = 0, - Declaration = static_cast(index::SymbolRole::Declaration), - Definition = static_cast(index::SymbolRole::Definition), - Reference = static_cast(index::SymbolRole::Reference), - All = Declaration | Definition | Reference, -}; -inline RefKind operator|(RefKind L, RefKind R) { - return static_cast(static_cast(L) | - static_cast(R)); -} -inline RefKind &operator|=(RefKind &L, RefKind R) { return L = L | R; } -inline RefKind operator&(RefKind A, RefKind B) { - return static_cast(static_cast(A) & - static_cast(B)); -} -llvm::raw_ostream &operator<<(llvm::raw_ostream &, RefKind); - -// Represents a symbol occurrence in the source file. -// Despite the name, it could be a declaration/definition/reference. -// -// WARNING: Location does not own the underlying data - Copies are shallow. -struct Ref { - // The source location where the symbol is named. - SymbolLocation Location; - RefKind Kind = RefKind::Unknown; -}; -inline bool operator<(const Ref &L, const Ref &R) { - return std::tie(L.Location, L.Kind) < std::tie(R.Location, R.Kind); -} -inline bool operator==(const Ref &L, const Ref &R) { - return std::tie(L.Location, L.Kind) == std::tie(R.Location, R.Kind); -} -llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Ref &); - -// An efficient structure of storing large set of symbol references in memory. -// Filenames are deduplicated. -class RefSlab { -public: - using value_type = std::pair>; - using const_iterator = std::vector::const_iterator; - using iterator = const_iterator; - - RefSlab() = default; - RefSlab(RefSlab &&Slab) = default; - RefSlab &operator=(RefSlab &&RHS) = default; - - const_iterator begin() const { return Refs.begin(); } - const_iterator end() const { return Refs.end(); } - /// Gets the number of symbols. - size_t size() const { return Refs.size(); } - size_t numRefs() const { return NumRefs; } - bool empty() const { return Refs.empty(); } - - size_t bytes() const { - return sizeof(*this) + Arena.getTotalMemory() + - sizeof(value_type) * Refs.size(); - } - - // RefSlab::Builder is a mutable container that can 'freeze' to RefSlab. - class Builder { - public: - Builder() : UniqueStrings(Arena) {} - // Adds a ref to the slab. Deep copy: Strings will be owned by the slab. - void insert(const SymbolID &ID, const Ref &S); - // Consumes the builder to finalize the slab. - RefSlab build() &&; - - private: - llvm::BumpPtrAllocator Arena; - llvm::UniqueStringSaver UniqueStrings; // Contents on the arena. - llvm::DenseMap> Refs; - }; - -private: - RefSlab(std::vector Refs, llvm::BumpPtrAllocator Arena, - size_t NumRefs) - : Arena(std::move(Arena)), Refs(std::move(Refs)), NumRefs(NumRefs) {} - - llvm::BumpPtrAllocator Arena; - std::vector Refs; - // Number of all references. - size_t NumRefs = 0; -}; - struct FuzzyFindRequest { /// \brief A query string for the fuzzy find. This is matched against symbols' /// un-qualified identifiers and should not contain qualifiers like "::". diff --git a/clang-tools-extra/clangd/index/Index.cpp b/clang-tools-extra/clangd/index/Index.cpp --- a/clang-tools-extra/clangd/index/Index.cpp +++ b/clang-tools-extra/clangd/index/Index.cpp @@ -12,57 +12,11 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" +#include namespace clang { namespace clangd { -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, RefKind K) { - if (K == RefKind::Unknown) - return OS << "Unknown"; - static const std::vector Messages = {"Decl", "Def", "Ref"}; - bool VisitedOnce = false; - for (unsigned I = 0; I < Messages.size(); ++I) { - if (static_cast(K) & 1u << I) { - if (VisitedOnce) - OS << ", "; - OS << Messages[I]; - VisitedOnce = true; - } - } - return OS; -} - -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Ref &R) { - return OS << R.Location << ":" << R.Kind; -} - -void RefSlab::Builder::insert(const SymbolID &ID, const Ref &S) { - auto &M = Refs[ID]; - M.push_back(S); - M.back().Location.FileURI = - UniqueStrings.save(M.back().Location.FileURI).data(); -} - -RefSlab RefSlab::Builder::build() && { - // We can reuse the arena, as it only has unique strings and we need them all. - // Reallocate refs on the arena to reduce waste and indirections when reading. - std::vector>> Result; - Result.reserve(Refs.size()); - size_t NumRefs = 0; - for (auto &Sym : Refs) { - auto &SymRefs = Sym.second; - llvm::sort(SymRefs); - // FIXME: do we really need to dedup? - SymRefs.erase(std::unique(SymRefs.begin(), SymRefs.end()), SymRefs.end()); - - NumRefs += SymRefs.size(); - auto *Array = Arena.Allocate(SymRefs.size()); - std::uninitialized_copy(SymRefs.begin(), SymRefs.end(), Array); - Result.emplace_back(Sym.first, llvm::ArrayRef(Array, SymRefs.size())); - } - return RefSlab(std::move(Result), std::move(Arena), NumRefs); -} - void SwapIndex::reset(std::unique_ptr Index) { // Keep the old index alive, so we don't destroy it under lock (may be slow). std::shared_ptr Pin; diff --git a/clang-tools-extra/clangd/index/Ref.h b/clang-tools-extra/clangd/index/Ref.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/index/Ref.h @@ -0,0 +1,119 @@ +//===--- Ref.h ---------------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_REF_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_REF_H + +#include "SymbolID.h" +#include "SymbolLocation.h" +#include "clang/Index/IndexSymbol.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace clang { +namespace clangd { + +/// Describes the kind of a cross-reference. +/// +/// This is a bitfield which can be combined from different kinds. +enum class RefKind : uint8_t { + Unknown = 0, + Declaration = static_cast(index::SymbolRole::Declaration), + Definition = static_cast(index::SymbolRole::Definition), + Reference = static_cast(index::SymbolRole::Reference), + All = Declaration | Definition | Reference, +}; + +inline RefKind operator|(RefKind L, RefKind R) { + return static_cast(static_cast(L) | + static_cast(R)); +} +inline RefKind &operator|=(RefKind &L, RefKind R) { return L = L | R; } +inline RefKind operator&(RefKind A, RefKind B) { + return static_cast(static_cast(A) & + static_cast(B)); +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &, RefKind); + +/// Represents a symbol occurrence in the source file. +/// Despite the name, it could be a declaration/definition/reference. +/// +/// WARNING: Location does not own the underlying data - Copies are shallow. +struct Ref { + /// The source location where the symbol is named. + SymbolLocation Location; + RefKind Kind = RefKind::Unknown; +}; + +inline bool operator<(const Ref &L, const Ref &R) { + return std::tie(L.Location, L.Kind) < std::tie(R.Location, R.Kind); +} +inline bool operator==(const Ref &L, const Ref &R) { + return std::tie(L.Location, L.Kind) == std::tie(R.Location, R.Kind); +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Ref &); + +/// An efficient structure of storing large set of symbol references in memory. +/// Filenames are deduplicated. +class RefSlab { +public: + using value_type = std::pair>; + using const_iterator = std::vector::const_iterator; + using iterator = const_iterator; + + RefSlab() = default; + RefSlab(RefSlab &&Slab) = default; + RefSlab &operator=(RefSlab &&RHS) = default; + + const_iterator begin() const { return Refs.begin(); } + const_iterator end() const { return Refs.end(); } + /// Gets the number of symbols. + size_t size() const { return Refs.size(); } + size_t numRefs() const { return NumRefs; } + bool empty() const { return Refs.empty(); } + + size_t bytes() const { + return sizeof(*this) + Arena.getTotalMemory() + + sizeof(value_type) * Refs.size(); + } + + /// RefSlab::Builder is a mutable container that can 'freeze' to RefSlab. + class Builder { + public: + Builder() : UniqueStrings(Arena) {} + /// Adds a ref to the slab. Deep copy: Strings will be owned by the slab. + void insert(const SymbolID &ID, const Ref &S); + /// Consumes the builder to finalize the slab. + RefSlab build() &&; + + private: + llvm::BumpPtrAllocator Arena; + llvm::UniqueStringSaver UniqueStrings; // Contents on the arena. + llvm::DenseMap> Refs; + }; + +private: + RefSlab(std::vector Refs, llvm::BumpPtrAllocator Arena, + size_t NumRefs) + : Arena(std::move(Arena)), Refs(std::move(Refs)), NumRefs(NumRefs) {} + + llvm::BumpPtrAllocator Arena; + std::vector Refs; + /// Number of all references. + size_t NumRefs = 0; +}; + +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_REF_H diff --git a/clang-tools-extra/clangd/index/Ref.cpp b/clang-tools-extra/clangd/index/Ref.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/index/Ref.cpp @@ -0,0 +1,62 @@ +//===--- Ref.cpp -------------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Ref.h" + +namespace clang { +namespace clangd { + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, RefKind K) { + if (K == RefKind::Unknown) + return OS << "Unknown"; + static const std::vector Messages = {"Decl", "Def", "Ref"}; + bool VisitedOnce = false; + for (unsigned I = 0; I < Messages.size(); ++I) { + if (static_cast(K) & 1u << I) { + if (VisitedOnce) + OS << ", "; + OS << Messages[I]; + VisitedOnce = true; + } + } + return OS; +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Ref &R) { + return OS << R.Location << ":" << R.Kind; +} + +void RefSlab::Builder::insert(const SymbolID &ID, const Ref &S) { + auto &M = Refs[ID]; + M.push_back(S); + M.back().Location.FileURI = + UniqueStrings.save(M.back().Location.FileURI).data(); +} + +RefSlab RefSlab::Builder::build() && { + // We can reuse the arena, as it only has unique strings and we need them all. + // Reallocate refs on the arena to reduce waste and indirections when reading. + std::vector>> Result; + Result.reserve(Refs.size()); + size_t NumRefs = 0; + for (auto &Sym : Refs) { + auto &SymRefs = Sym.second; + llvm::sort(SymRefs); + // FIXME: do we really need to dedup? + SymRefs.erase(std::unique(SymRefs.begin(), SymRefs.end()), SymRefs.end()); + + NumRefs += SymRefs.size(); + auto *Array = Arena.Allocate(SymRefs.size()); + std::uninitialized_copy(SymRefs.begin(), SymRefs.end(), Array); + Result.emplace_back(Sym.first, llvm::ArrayRef(Array, SymRefs.size())); + } + return RefSlab(std::move(Result), std::move(Arena), NumRefs); +} + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -11,6 +11,7 @@ #include "CanonicalIncludes.h" #include "CodeComplete.h" #include "CodeCompletionStrings.h" +#include "ExpectedTypes.h" #include "Logger.h" #include "SourceCode.h" #include "SymbolLocation.h" diff --git a/clang-tools-extra/clangd/indexer/IndexerMain.cpp b/clang-tools-extra/clangd/indexer/IndexerMain.cpp --- a/clang-tools-extra/clangd/indexer/IndexerMain.cpp +++ b/clang-tools-extra/clangd/indexer/IndexerMain.cpp @@ -10,10 +10,10 @@ // //===----------------------------------------------------------------------===// -#include "index/Index.h" #include "index/IndexAction.h" #include "index/Merge.h" #include "index/Serialization.h" +#include "index/Symbol.h" #include "index/SymbolCollector.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/Execution.h"