Index: clang/include/clang/IndexSerialization/PathIndexer.h =================================================================== --- /dev/null +++ clang/include/clang/IndexSerialization/PathIndexer.h @@ -0,0 +1,89 @@ +//===--- PathIndexer.h -- Index of paths ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_INDEX_PATHINDEXER_H +#define LLVM_CLANG_INDEX_PATHINDEXER_H + +#include "clang/Basic/FileManager.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" + +#include +#include + +/// Represents a special root directory of a filesystem path. +enum class RootDirKind { + Regular = 0, + CurrentWorkDir = 1, + SysRoot = 2, +}; + +/// Represents a single filesystem path backed by a buffer. +struct BitPathComponent { + const size_t Offset; + const size_t Size; + + BitPathComponent(size_t Offset, size_t Size) : Offset(Offset), Size(Size) {} +}; + +/// Represents a directory path backed by a buffer. +struct DirBitPath { + /// Is this directory in any special root directory? + const RootDirKind PrefixKind; + /// Relative path to root represented by PrefixKind. + const BitPathComponent Dir; + + DirBitPath(RootDirKind Kind, const BitPathComponent &Dir) + : PrefixKind(Kind), Dir(Dir) {} +}; + +/// Represents a file path backed by a buffer. +struct FileBitPath : DirBitPath { + const BitPathComponent Filename; + + FileBitPath(RootDirKind Kind, const BitPathComponent &Dir, + const BitPathComponent &Filename) + : DirBitPath(Kind, Dir), Filename(Filename) {} +}; + +/// Builds index of unique filesystem paths. +class PathIndexer { + std::string WorkDir; + std::string SysrootPath; + llvm::SmallString<512> PathsBuf; + llvm::StringMap Dirs; + std::vector FileBitPaths; + llvm::DenseMap FileToIndex; + +public: + /// \p CWD is current working directory. + PathIndexer(llvm::StringRef CWD, llvm::StringRef sysrootPath); + + /// \returns buffer with stored paths. + llvm::StringRef getPathsBuffer() const { return PathsBuf.str(); } + + /// \returns array of offsets and sizes of stored paths. + llvm::ArrayRef getBitPaths() const { return FileBitPaths; } + + /// Stores path to \p FE if it hasn't been stored yet. + /// \returns index to FileToIndex or -1 if \p FE == nullptr. + int tryStoreFilePath(const clang::FileEntry *FE); + + /// Stores \p Path if it is non-empty. + /// Warning: this method doesn't check for uniqueness. + /// \returns offset of \p Path value begin in buffer with stored paths. + size_t storePath(llvm::StringRef Path); + +private: + /// Stores \p dirStr path if it hasn't been stored yet. + DirBitPath tryStoreDirPath(llvm::StringRef dirStr); +}; + +#endif // LLVM_CLANG_INDEX_PATHINDEXER_H Index: clang/lib/CMakeLists.txt =================================================================== --- clang/lib/CMakeLists.txt +++ clang/lib/CMakeLists.txt @@ -20,6 +20,7 @@ add_subdirectory(Tooling) add_subdirectory(DirectoryWatcher) add_subdirectory(Index) +add_subdirectory(IndexSerialization) if(CLANG_ENABLE_STATIC_ANALYZER) add_subdirectory(StaticAnalyzer) endif() Index: clang/lib/IndexSerialization/CMakeLists.txt =================================================================== --- /dev/null +++ clang/lib/IndexSerialization/CMakeLists.txt @@ -0,0 +1,6 @@ +add_clang_library(clangIndexSerialization + PathIndexer.cpp + + LINK_LIBS + clangBasic + ) Index: clang/lib/IndexSerialization/PathIndexer.cpp =================================================================== --- /dev/null +++ clang/lib/IndexSerialization/PathIndexer.cpp @@ -0,0 +1,70 @@ +//===--- PathIndexer.cpp -- Index of paths ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/IndexSerialization/PathIndexer.h" + +using namespace llvm; +using namespace clang; + +PathIndexer::PathIndexer(StringRef workDir, StringRef sysrootPath) + : WorkDir(workDir), + SysrootPath(sysrootPath == "/" ? sysrootPath = StringRef() + : sysrootPath) {} + +int PathIndexer::tryStoreFilePath(const FileEntry *FE) { + if (!FE) + return -1; + auto Pair = FileToIndex.insert(std::make_pair(FE, FileBitPaths.size())); + bool IsNew = Pair.second; + size_t Index = Pair.first->getSecond(); + + if (IsNew) { + StringRef Filename = sys::path::filename(FE->getName()); + DirBitPath Dir = tryStoreDirPath(sys::path::parent_path(FE->getName())); + FileBitPaths.emplace_back( + Dir.PrefixKind, Dir.Dir, + BitPathComponent(storePath(Filename), Filename.size())); + } + return Index; +} + +size_t PathIndexer::storePath(StringRef Path) { + if (Path.empty()) + return 0; + size_t offset = PathsBuf.size(); + PathsBuf += Path; + return offset; +} + +static bool isPathInDir(StringRef dir, StringRef path) { + if (dir.empty() || !path.startswith(dir)) + return false; + StringRef rest = path.drop_front(dir.size()); + return !rest.empty() && sys::path::is_separator(rest.front()); +} + +DirBitPath PathIndexer::tryStoreDirPath(StringRef dirStr) { + auto dirIt = Dirs.find(dirStr); + if (dirIt != Dirs.end()) + return dirIt->second; + + RootDirKind Kind = RootDirKind::Regular; + if (isPathInDir(SysrootPath, dirStr)) { + Kind = RootDirKind::SysRoot; + dirStr = dirStr.drop_front(SysrootPath.size()); + while (!dirStr.empty() && dirStr[0] == '/') + dirStr = dirStr.drop_front(); + } else if (isPathInDir(WorkDir, dirStr)) { + Kind = RootDirKind::CurrentWorkDir; + dirStr = dirStr.drop_front(WorkDir.size()); + while (!dirStr.empty() && dirStr[0] == '/') + dirStr = dirStr.drop_front(); + } + + return DirBitPath(Kind, BitPathComponent(storePath(dirStr), dirStr.size())); +}