Index: clangd/CMakeLists.txt =================================================================== --- clangd/CMakeLists.txt +++ clangd/CMakeLists.txt @@ -18,6 +18,8 @@ Protocol.cpp ProtocolHandlers.cpp Trace.cpp + index/Index.cpp + index/SymbolCollector.cpp LINK_LIBS clangAST Index: clangd/index/Index.h =================================================================== --- /dev/null +++ clangd/index/Index.h @@ -0,0 +1,136 @@ +//===--- Symbol.h -----------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H + +#include "clang/Index/IndexSymbol.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringExtras.h" + +#include +#include + +namespace clang { +namespace clangd { + +struct SymbolLocation { + // The absolute path of the source file where a symbol occurs. + std::string FilePath; + // The 0-based offset to the first character of the symbol from the beginning + // of the source file. + unsigned StartOffset; + // The 0-based offset to the last character of the symbol from the beginning + // of the source file. + unsigned EndOffset; +}; + +// The class identifies a particular C++ symbol (class, function, method, etc). +// +// As USRs (Unified Symbol Resolution) could be large, especially for functions +// with long type arguments, SymbolID is using 160-bits SHA1(USR) values to +// guarantee the uniqueness of symbols while using a relatively small amount of +// memory (vs storing USRs directly). +// +// SymbolID can be used as key in the symbol indexes to lookup the symbol. +class SymbolID { +public: + SymbolID() = default; + SymbolID(llvm::StringRef USR); + + bool operator==(const SymbolID& Sym) const { + return HashValue == Sym.HashValue; + } + +private: + friend class llvm::DenseMapInfo; + + std::array HashValue; +}; + +// The class presents a C++ symbol, e.g. class, function. +// +// FIXME: instead of having own copy fields for each symbol, we can share +// storage from SymbolSlab. +struct Symbol { + // The ID of the symbol. + SymbolID ID; + // The qualified name of the symbol, e.g. Foo::bar. + std::string QualifiedName; + // The symbol information, like symbol kind. + index::SymbolInfo SymInfo; + // The location of the canonical declaration of the symbol. + // + // A C++ symbol could have multiple declarations and one definition (e.g. + // a function is declared in ".h" file, and is defined in ".cc" file). + // * For classes, the canonical declaration is usually definition. + // * For non-inline functions, the canonical declaration is a declaration + // (not a definition), which is usually declared in ".h" file. + SymbolLocation CanonicalDeclaration; + + // FIXME: add definition location of the symbol. + // FIXME: add all occurrences support. + // FIXME: add extra fields for index scoring signals. + // FIXME: add code completion information. +}; + +// A symbol container that stores a set of symbols. The container will maintain +// the lifetime of the symbols. +// +// FIXME: Use a space-efficient implementation, a lot of Symbol fields could +// share the same storage. +class SymbolSlab { + public: + using const_iterator = llvm::DenseMap::const_iterator; + + SymbolSlab() = default; + + const_iterator begin() const; + const_iterator end() const; + const_iterator find(const SymbolID& SymID) const; + + // Once called, no more symbols would be added to the SymbolSlab. This + // operation is irreversible. + void freeze(); + + void insert(Symbol S); + + private: + bool Frozen = false; + + llvm::DenseMap Symbols; +}; + +} // namespace clangd +} // namespace clang + +namespace llvm { + +template <> struct DenseMapInfo { + static inline clang::clangd::SymbolID getEmptyKey() { + static clang::clangd::SymbolID EmptyKey("EMPTYKEY"); + return EmptyKey; + } + static inline clang::clangd::SymbolID getTombstoneKey() { + static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY"); + return TombstoneKey; + } + static unsigned getHashValue(const clang::clangd::SymbolID &Sym) { + return hash_value( + ArrayRef(Sym.HashValue.data(), Sym.HashValue.size())); + } + static bool isEqual(const clang::clangd::SymbolID &LHS, + const clang::clangd::SymbolID &RHS) { + return LHS == RHS; + } +}; + +} // namespace llvm + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H Index: clangd/index/Index.cpp =================================================================== --- /dev/null +++ clangd/index/Index.cpp @@ -0,0 +1,49 @@ +//===--- Index.cpp -----------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Index.h" + +#include "llvm/Support/SHA1.h" + +namespace clang { +namespace clangd { + +namespace { +ArrayRef toArrayRef(StringRef S) { + return {reinterpret_cast(S.data()), S.size()}; +} +} // namespace + +SymbolID::SymbolID(llvm::StringRef USR) + : HashValue(llvm::SHA1::hash(toArrayRef(USR))) {} + +SymbolSlab::const_iterator SymbolSlab::begin() const { + return Symbols.begin(); +} + +SymbolSlab::const_iterator SymbolSlab::end() const { + return Symbols.end(); +} + +SymbolSlab::const_iterator SymbolSlab::find(const SymbolID& SymID) const { + return Symbols.find(SymID); +} + +void SymbolSlab::freeze() { + Frozen = true; +} + +void SymbolSlab::insert(Symbol S) { + assert(!Frozen && + "Can't insert a symbol after the slab has been frozen!"); + Symbols[S.ID] = std::move(S); +} + +} // namespace clangd +} // namespace clang Index: clangd/index/SymbolCollector.h =================================================================== --- /dev/null +++ clangd/index/SymbolCollector.h @@ -0,0 +1,43 @@ +//===--- SymbolCollector.h ---------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Index.h" + +#include "clang/Index/IndexDataConsumer.h" +#include "clang/Index/IndexSymbol.h" + +namespace clang { +namespace clangd { + +// Collect all symbols from an AST. +// +// Clients (e.g. clangd) can use SymbolCollector together with +// index::indexTopLevelDecls to retrieve all symbols when the source file is +// changed. +class SymbolCollector : public index::IndexDataConsumer { +public: + SymbolCollector() = default; + + bool + handleDeclOccurence(const Decl *D, index::SymbolRoleSet Roles, + ArrayRef Relations, FileID FID, + unsigned Offset, + index::IndexDataConsumer::ASTNodeInfo ASTNode) override; + + void finish() override; + + SymbolSlab takeSymbols() const { return std::move(Symbols); } + +private: + // All Symbols collected from the AST. + SymbolSlab Symbols; +}; + +} // namespace clangd +} // namespace clang Index: clangd/index/SymbolCollector.cpp =================================================================== --- /dev/null +++ clangd/index/SymbolCollector.cpp @@ -0,0 +1,102 @@ +//===--- SymbolCollector.cpp -------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SymbolCollector.h" + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Index/IndexSymbol.h" +#include "clang/Index/USRGeneration.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" + +namespace clang { +namespace clangd { + +namespace { +// Make the Path absolute using the current working directory of the given +// SourceManager if the Path is not an absolute path. +// +// The Path can be a path relative to the build directory, or retrieved from +// the SourceManager. +std::string makeAbsolutePath(const SourceManager &SM, StringRef Path) { + llvm::SmallString<128> AbsolutePath(Path); + if (std::error_code EC = + SM.getFileManager().getVirtualFileSystem()->makeAbsolute( + AbsolutePath)) + llvm::errs() << "Warning: could not make absolute file: '" << EC.message() + << '\n'; + // Handle the symbolic link path case where the current working directory + // (getCurrentWorkingDirectory) is a symlink./ We always want to the real + // file path (instead of the symlink path) for the C++ symbols. + // + // Consider the following example: + // + // src dir: /project/src/foo.h + // current working directory (symlink): /tmp/build -> /project/src/ + // + // The file path of Symbol is "/project/src/foo.h" instead of + // "/tmp/build/foo.h" + const DirectoryEntry *Dir = SM.getFileManager().getDirectory( + llvm::sys::path::parent_path(AbsolutePath.str())); + if (Dir) { + StringRef DirName = SM.getFileManager().getCanonicalName(Dir); + SmallVector AbsoluteFilename; + llvm::sys::path::append(AbsoluteFilename, DirName, + llvm::sys::path::filename(AbsolutePath.str())); + return llvm::StringRef(AbsoluteFilename.data(), AbsoluteFilename.size()) + .str(); + } + return AbsolutePath.str(); +} +} // namespace + +// Always return true to continue indexing. +bool SymbolCollector::handleDeclOccurence( + const Decl *D, index::SymbolRoleSet Roles, + ArrayRef Relations, FileID FID, unsigned Offset, + index::IndexDataConsumer::ASTNodeInfo ASTNode) { + // FIXME: collect all symbol references. + if (!(Roles & static_cast(index::SymbolRole::Declaration) || + Roles & static_cast(index::SymbolRole::Definition))) + return true; + + if (const NamedDecl *ND = llvm::dyn_cast(D)) { + // FIXME: Should we include the internal linkage symbols? + if (!ND->hasExternalFormalLinkage() || ND->isInAnonymousNamespace()) + return true; + + llvm::SmallVector Buff; + if (index::generateUSRForDecl(ND, Buff)) + return true; + + std::string USR(Buff.data(), Buff.size()); + auto ID = SymbolID(USR); + if (Symbols.find(ID) != Symbols.end()) + return true; + + auto &SM = ND->getASTContext().getSourceManager(); + SymbolLocation Location = { + makeAbsolutePath(SM, SM.getFilename(D->getLocation())), + SM.getFileOffset(D->getLocStart()), SM.getFileOffset(D->getLocEnd())}; + Symbols.insert({std::move(ID), ND->getQualifiedNameAsString(), + index::getSymbolInfo(D), std::move(Location)}); + } + + return true; +} + +void SymbolCollector::finish() { + Symbols.freeze(); +} + +} // namespace clangd +} // namespace clang Index: unittests/clangd/CMakeLists.txt =================================================================== --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -15,6 +15,7 @@ JSONExprTests.cpp TestFS.cpp TraceTests.cpp + SymbolCollectorTests.cpp ) target_link_libraries(ClangdTests Index: unittests/clangd/SymbolCollectorTests.cpp =================================================================== --- /dev/null +++ unittests/clangd/SymbolCollectorTests.cpp @@ -0,0 +1,110 @@ +//===-- SymbolCollectorTests.cpp -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "index/SymbolCollector.h" +#include "clang/Index/IndexingAction.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/FileSystemOptions.h" +#include "clang/Basic/VirtualFileSystem.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "gtest/gtest.h" +#include "gmock/gmock.h" + +#include +#include + +using testing::UnorderedElementsAre; +using testing::Eq; +using testing::Field; + +// GMock helpers for matching Symbol. +MATCHER_P(QName, Name, "") { return arg.second.QualifiedName == Name; } + +namespace clang { +namespace clangd { + +namespace { +class SymbolIndexActionFactory : public tooling::FrontendActionFactory { + public: + SymbolIndexActionFactory() = default; + + clang::FrontendAction *create() override { + index::IndexingOptions IndexOpts; + IndexOpts.SystemSymbolFilter = + index::IndexingOptions::SystemSymbolFilterKind::All; + IndexOpts.IndexFunctionLocals = false; + Collector = std::make_shared(); + FrontendAction *Action = + index::createIndexingAction(Collector, IndexOpts, nullptr).release(); + return Action; + } + + std::shared_ptr Collector; +}; + +class SymbolCollectorTest : public ::testing::Test { +public: + bool runSymbolCollector(StringRef HeaderCode, StringRef MainCode) { + llvm::IntrusiveRefCntPtr InMemoryFileSystem( + new vfs::InMemoryFileSystem); + llvm::IntrusiveRefCntPtr Files( + new FileManager(FileSystemOptions(), InMemoryFileSystem)); + + const std::string FileName = "symbol.cc"; + const std::string HeaderName = "symbols.h"; + auto Factory = llvm::make_unique(); + + tooling::ToolInvocation Invocation( + {"symbol_collector", "-fsyntax-only", "-std=c++11", FileName}, + Factory->create(), Files.get(), + std::make_shared()); + + InMemoryFileSystem->addFile(HeaderName, 0, + llvm::MemoryBuffer::getMemBuffer(HeaderCode)); + + std::string Content = "#include\"" + std::string(HeaderName) + "\""; + Content += "\n" + MainCode.str(); + InMemoryFileSystem->addFile(FileName, 0, + llvm::MemoryBuffer::getMemBuffer(Content)); + Invocation.run(); + Symbols = Factory->Collector->takeSymbols(); + return true; + } + +protected: + SymbolSlab Symbols; +}; + +TEST_F(SymbolCollectorTest, CollectSymbol) { + const std::string Header = R"( + class Foo { + void f(); + }; + void f1(); + inline void f2() {} + )"; + const std::string Main = R"( + namespace { + void ff() {} // ignore + } + void f1() {} + )"; + runSymbolCollector(Header, Main); + EXPECT_THAT(Symbols, UnorderedElementsAre(QName("Foo"), QName("Foo::f"), + QName("f1"), QName("f2"))); +} + +} // namespace +} // namespace clangd +} // namespace clang