Index: clang-tools-extra/trunk/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/trunk/clangd/CMakeLists.txt +++ clang-tools-extra/trunk/clangd/CMakeLists.txt @@ -46,6 +46,7 @@ index/IndexAction.cpp index/MemIndex.cpp index/Merge.cpp + index/SymbolID.cpp index/Serialization.cpp index/SymbolCollector.cpp index/YAMLSerialization.cpp Index: clang-tools-extra/trunk/clangd/Protocol.h =================================================================== --- clang-tools-extra/trunk/clangd/Protocol.h +++ clang-tools-extra/trunk/clangd/Protocol.h @@ -25,6 +25,7 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PROTOCOL_H #include "URI.h" +#include "index/SymbolID.h" #include "llvm/ADT/Optional.h" #include "llvm/Support/JSON.h" #include Index: clang-tools-extra/trunk/clangd/index/Index.h =================================================================== --- clang-tools-extra/trunk/clangd/index/Index.h +++ clang-tools-extra/trunk/clangd/index/Index.h @@ -11,11 +11,11 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H #include "ExpectedTypes.h" +#include "SymbolID.h" #include "clang/Index/IndexSymbol.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Hashing.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -95,53 +95,6 @@ } llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &); -// The class identifies a particular C++ symbol (class, function, method, etc). -// -// As USRs (Unified Symbol Resolution) could be large, especially for functions -// with long type arguments, SymbolID is using truncated SHA1(USR) values to -// guarantee the uniqueness of symbols while using a relatively small amount of -// memory (vs storing USRs directly). -// -// SymbolID can be used as key in the symbol indexes to lookup the symbol. -class SymbolID { -public: - SymbolID() = default; - explicit SymbolID(llvm::StringRef USR); - - bool operator==(const SymbolID &Sym) const { - return HashValue == Sym.HashValue; - } - bool operator<(const SymbolID &Sym) const { - return HashValue < Sym.HashValue; - } - - // The stored hash is truncated to RawSize bytes. - // This trades off memory against the number of symbols we can handle. - constexpr static size_t RawSize = 8; - llvm::StringRef raw() const { - return StringRef(reinterpret_cast(HashValue.data()), RawSize); - } - static SymbolID fromRaw(llvm::StringRef); - - // Returns a hex encoded string. - std::string str() const; - static llvm::Expected fromStr(llvm::StringRef); - -private: - std::array HashValue; -}; - -inline llvm::hash_code hash_value(const SymbolID &ID) { - // We already have a good hash, just return the first bytes. - assert(sizeof(size_t) <= SymbolID::RawSize && "size_t longer than SHA1!"); - size_t Result; - memcpy(&Result, ID.raw().data(), sizeof(size_t)); - return llvm::hash_code(Result); -} - -// Write SymbolID into the given stream. SymbolID is encoded as ID.str(). -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID); - } // namespace clangd } // namespace clang namespace llvm { Index: clang-tools-extra/trunk/clangd/index/Index.cpp =================================================================== --- clang-tools-extra/trunk/clangd/index/Index.cpp +++ clang-tools-extra/trunk/clangd/index/Index.cpp @@ -12,7 +12,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" -#include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -43,34 +42,6 @@ << "-" << L.End.line() << ":" << L.End.column() << ")"; } -SymbolID::SymbolID(StringRef USR) { - auto Hash = SHA1::hash(arrayRefFromStringRef(USR)); - static_assert(sizeof(Hash) >= RawSize, "RawSize larger than SHA1"); - memcpy(HashValue.data(), Hash.data(), RawSize); -} - -raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) { - return OS << toHex(ID.raw()); -} - -SymbolID SymbolID::fromRaw(StringRef Raw) { - SymbolID ID; - assert(Raw.size() == RawSize); - memcpy(ID.HashValue.data(), Raw.data(), RawSize); - return ID; -} - -std::string SymbolID::str() const { return toHex(raw()); } - -Expected SymbolID::fromStr(StringRef Str) { - if (Str.size() != RawSize * 2) - return createStringError(inconvertibleErrorCode(), "Bad ID length"); - for (char C : Str) - if (!isHexDigit(C)) - return createStringError(inconvertibleErrorCode(), "Bad hex ID"); - return fromRaw(fromHex(Str)); -} - raw_ostream &operator<<(raw_ostream &OS, SymbolOrigin O) { if (O == SymbolOrigin::Unknown) return OS << "unknown"; Index: clang-tools-extra/trunk/clangd/index/SymbolID.h =================================================================== --- clang-tools-extra/trunk/clangd/index/SymbolID.h +++ clang-tools-extra/trunk/clangd/index/SymbolID.h @@ -0,0 +1,65 @@ +//===--- SymbolID.h ----------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLID_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLID_H + +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace clang { +namespace clangd { + +// The class identifies a particular C++ symbol (class, function, method, etc). +// +// As USRs (Unified Symbol Resolution) could be large, especially for functions +// with long type arguments, SymbolID is using truncated SHA1(USR) values to +// guarantee the uniqueness of symbols while using a relatively small amount of +// memory (vs storing USRs directly). +// +// SymbolID can be used as key in the symbol indexes to lookup the symbol. +class SymbolID { +public: + SymbolID() = default; + explicit SymbolID(llvm::StringRef USR); + + bool operator==(const SymbolID &Sym) const { + return HashValue == Sym.HashValue; + } + bool operator<(const SymbolID &Sym) const { + return HashValue < Sym.HashValue; + } + + // The stored hash is truncated to RawSize bytes. + // This trades off memory against the number of symbols we can handle. + constexpr static size_t RawSize = 8; + llvm::StringRef raw() const; + static SymbolID fromRaw(llvm::StringRef); + + // Returns a hex encoded string. + std::string str() const; + static llvm::Expected fromStr(llvm::StringRef); + +private: + std::array HashValue; +}; + +llvm::hash_code hash_value(const SymbolID &ID); + +// Write SymbolID into the given stream. SymbolID is encoded as ID.str(). +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID); + +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLID_H Index: clang-tools-extra/trunk/clangd/index/SymbolID.cpp =================================================================== --- clang-tools-extra/trunk/clangd/index/SymbolID.cpp +++ clang-tools-extra/trunk/clangd/index/SymbolID.cpp @@ -0,0 +1,58 @@ +//===--- SymbolID.cpp --------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SymbolID.h" +#include "llvm/Support/SHA1.h" + +using namespace llvm; +namespace clang { +namespace clangd { + +SymbolID::SymbolID(StringRef USR) { + auto Hash = llvm::SHA1::hash(arrayRefFromStringRef(USR)); + static_assert(sizeof(Hash) >= RawSize, "RawSize larger than SHA1"); + memcpy(HashValue.data(), Hash.data(), RawSize); +} + +llvm::StringRef SymbolID::raw() const { + return StringRef(reinterpret_cast(HashValue.data()), RawSize); +} + +SymbolID SymbolID::fromRaw(StringRef Raw) { + SymbolID ID; + assert(Raw.size() == RawSize); + memcpy(ID.HashValue.data(), Raw.data(), RawSize); + return ID; +} + +std::string SymbolID::str() const { return toHex(raw()); } + +Expected SymbolID::fromStr(StringRef Str) { + if (Str.size() != RawSize * 2) + return createStringError(inconvertibleErrorCode(), "Bad ID length"); + for (char C : Str) + if (!isHexDigit(C)) + return createStringError(inconvertibleErrorCode(), "Bad hex ID"); + return fromRaw(fromHex(Str)); +} + +raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) { + return OS << toHex(ID.raw()); +} + +llvm::hash_code hash_value(const SymbolID &ID) { + // We already have a good hash, just return the first bytes. + assert(sizeof(size_t) <= SymbolID::RawSize && "size_t longer than SHA1!"); + size_t Result; + memcpy(&Result, ID.raw().data(), sizeof(size_t)); + return llvm::hash_code(Result); +} + +} // namespace clangd +} // namespace clang