diff --git a/clang-tools-extra/clangd/support/Path.h b/clang-tools-extra/clangd/support/Path.h --- a/clang-tools-extra/clangd/support/Path.h +++ b/clang-tools-extra/clangd/support/Path.h @@ -9,7 +9,9 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_PATH_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_PATH_H +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include namespace clang { @@ -17,18 +19,67 @@ /// A typedef to represent a file path. Used solely for more descriptive /// signatures. -using Path = std::string; +// using Path = std::string; /// A typedef to represent a ref to file path. Used solely for more descriptive /// signatures. -using PathRef = llvm::StringRef; +// using PathRef = llvm::StringRef; // For platforms where paths are case-insensitive (but case-preserving), // we need to do case-insensitive comparisons and use lowercase keys. // FIXME: Make Path a real class with desired semantics instead. -std::string maybeCaseFoldPath(PathRef Path); -bool pathEqual(PathRef, PathRef); +std::string maybeCaseFoldPath(llvm::StringRef Path); +bool pathEqual(llvm::StringRef, llvm::StringRef); + +class IPath { + const char *Data; + unsigned Length; + unsigned Hash; + + IPath(unsigned Sentinel) : Data(nullptr), Length(0), Hash(Sentinel) {} + struct CanonicalTag {}; + IPath(llvm::StringRef Value, CanonicalTag); + +public: + IPath() : Data(0), Length(0), Hash(0) {} + explicit IPath(llvm::StringRef Value); + IPath &operator=(const IPath &) = default; + IPath &operator=(llvm::StringRef V) { return *this = IPath(V); } + + bool empty() const { return !Length; } + + operator llvm::StringRef() const { return llvm::StringRef(Data, Length); } + operator llvm::Twine() const { return llvm::Twine(llvm::StringRef(*this)); } + friend bool operator==(IPath L, IPath R) { + if (L.Data == R.Data) + return true; + if (L.Hash != R.Hash) + return false; + return pathEqual(L, R); + } + friend llvm::hash_code hash_value(IPath P) { + // Just mix the stored 32-bit hash, should be enough?! + return llvm::hash_value(P.Hash); + } + + friend struct llvm::DenseMapInfo; + friend class PathTable; +}; +inline bool operator!=(IPath L, IPath R) { return !(L == R); } + +using Path = IPath; +using PathRef = IPath; } // namespace clangd } // namespace clang +namespace llvm { +template <> struct llvm::DenseMapInfo { + using IPath = clang::clangd::IPath; + static inline IPath getEmptyKey() { return IPath(); } + static inline IPath getTombstoneKey() { return IPath(1); } + static unsigned getHashValue(IPath Val) { return Val.Hash; } + static bool isEqual(IPath L, clang::clangd::IPath R) { return L == R; } +}; +} // namespace llvm + #endif diff --git a/clang-tools-extra/clangd/support/Path.cpp b/clang-tools-extra/clangd/support/Path.cpp --- a/clang-tools-extra/clangd/support/Path.cpp +++ b/clang-tools-extra/clangd/support/Path.cpp @@ -7,10 +7,16 @@ //===----------------------------------------------------------------------===// #include "support/Path.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include +#include + namespace clang { namespace clangd { -std::string maybeCaseFoldPath(PathRef Path) { +std::string maybeCaseFoldPath(llvm::StringRef Path) { #if defined(_WIN32) || defined(__APPLE__) return Path.lower(); #else @@ -18,7 +24,7 @@ #endif } -bool pathEqual(PathRef A, PathRef B) { +bool pathEqual(llvm::StringRef A, llvm::StringRef B) { #if defined(_WIN32) || defined(__APPLE__) return A.equals_lower(B); #else @@ -26,5 +32,88 @@ #endif } +static unsigned pathHash(llvm::StringRef Path) { + llvm::SmallString<256> Upper = Path; + for (char &C : Upper) + C &= 0xdf; + return llvm::hash_value(Path); +} + +class PathTable { + struct Entry { + IPath P; + std::atomic Next; + llvm::hash_code Hash; + }; + const unsigned NBuckets; + std::atomic *Table; + std::atomic MemoryUsage; + + constexpr static std::memory_order Acquire = std::memory_order_acquire; + constexpr static std::memory_order Release = std::memory_order_release; + +public: + PathTable(unsigned NBuckets) + : NBuckets(NBuckets), Table(new std::atomic[NBuckets]) {} + + ~PathTable() { + assert(MemoryUsage == 0 && "Cannot destroy nonempty PathTable"); + delete[](Table); + } + + const IPath &put(llvm::StringRef Key) { + llvm::hash_code Hash = llvm::hash_value(Key); + std::atomic *Tail = &Table[Hash % NBuckets]; + // Scan through the bucket looking for a matching result. + for (;;) { + Entry *E = Tail->load(std::memory_order_acquire); + if (!E) + break; + if (E->Hash == Hash) { + // Don't bother comparing strings, we won't see >1M. + assert(Key == llvm::StringRef(E->P) && "Hash collision?"); + return E->P; + } + Tail = &E->Next; + } + // Not found. + // Tail points to the null "next" pointer at the end of the chain. + // Allocate the data, so we can attach it to the chain. + unsigned BufSize = sizeof(Entry) + Key.size(); + void *Buf = malloc(BufSize); + char *Data = reinterpret_cast(Buf) + sizeof(Entry); + memcpy(Data, Key.data(), Key.size()); + Entry *NewEntry = new (Buf) Entry{ + IPath(llvm::StringRef(Data, Key.size()), IPath::CanonicalTag{}), + {nullptr}, + Hash, + }; + // Now attempt to append it to the chain. + for (;;) { + Entry *NewTail = nullptr; + if (Tail->compare_exchange_weak(NewTail, NewEntry, + std::memory_order_acq_rel)) { + MemoryUsage += BufSize; + return NewEntry->P; // insert succeeded + } + // We lost a race against another insert... + if (NewTail->Hash == Hash) { // ...of the same value? + NewEntry->~Entry(); + free(Buf); + return NewTail->P; + } + // ... or of a different value, advance and retry. + Tail = &NewTail->Next; + } + } +}; +PathTable Table(1 << 20); + +IPath::IPath(llvm::StringRef Value) { + *this = Value.size() ? IPath() : Table.put(Value); +} +IPath::IPath(llvm::StringRef Value, CanonicalTag) + : Data(Value.data()), Length(Value.size()), Hash(pathHash(Value)) {} + } // namespace clangd } // namespace clang