Index: clangd/CMakeLists.txt =================================================================== --- clangd/CMakeLists.txt +++ clangd/CMakeLists.txt @@ -19,6 +19,7 @@ Diagnostics.cpp DraftStore.cpp FindSymbols.cpp + FileDistance.cpp FuzzyMatch.cpp GlobalCompilationDatabase.cpp Headers.cpp Index: clangd/ClangdServer.cpp =================================================================== --- clangd/ClangdServer.cpp +++ clangd/ClangdServer.cpp @@ -162,7 +162,7 @@ // both the old and the new version in case only one of them matches. CompletionList Result = clangd::codeComplete( File, IP->Command, PreambleData ? &PreambleData->Preamble : nullptr, - PreambleData ? PreambleData->Inclusions : std::vector(), + PreambleData ? PreambleData->Includes : IncludeStructure(), IP->Contents, Pos, FS, PCHs, CodeCompleteOpts); CB(std::move(Result)); }; Index: clangd/ClangdUnit.h =================================================================== --- clangd/ClangdUnit.h +++ clangd/ClangdUnit.h @@ -45,14 +45,14 @@ // Stores Preamble and associated data. struct PreambleData { PreambleData(PrecompiledPreamble Preamble, std::vector Diags, - std::vector Inclusions); + IncludeStructure Includes); tooling::CompileCommand CompileCommand; PrecompiledPreamble Preamble; std::vector Diags; // Processes like code completions and go-to-definitions will need #include // information, and their compile action skips preamble range. - std::vector Inclusions; + IncludeStructure Includes; }; /// Information required to run clang, e.g. to parse AST or do code completion. @@ -99,14 +99,14 @@ /// Returns the esitmated size of the AST and the accessory structures, in /// bytes. Does not include the size of the preamble. std::size_t getUsedBytes() const; - const std::vector &getInclusions() const; + const IncludeStructure &getIncludeStructure() const; private: ParsedAST(std::shared_ptr Preamble, std::unique_ptr Clang, std::unique_ptr Action, std::vector LocalTopLevelDecls, std::vector Diags, - std::vector Inclusions); + IncludeStructure Includes); // In-memory preambles must outlive the AST, it is important that this member // goes before Clang and Action. @@ -124,7 +124,7 @@ // Top-level decls inside the current file. Not that this does not include // top-level decls from the preamble. std::vector LocalTopLevelDecls; - std::vector Inclusions; + IncludeStructure Includes; }; using PreambleParsedCallback = std::function takeInclusions() { return std::move(Inclusions); } + IncludeStructure takeIncludes() { return std::move(Includes); } void AfterExecute(CompilerInstance &CI) override { if (!ParsedCallback) @@ -103,15 +103,13 @@ std::unique_ptr createPPCallbacks() override { assert(SourceMgr && "SourceMgr must be set at this point"); - return collectInclusionsInMainFileCallback( - *SourceMgr, - [this](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); }); + return collectIncludeStructureCallback(*SourceMgr, &Includes); } private: PathRef File; PreambleParsedCallback ParsedCallback; - std::vector Inclusions; + IncludeStructure Includes; SourceManager *SourceMgr = nullptr; }; @@ -153,15 +151,11 @@ return llvm::None; } - std::vector Inclusions; // Copy over the includes from the preamble, then combine with the // non-preamble includes below. - if (Preamble) - Inclusions = Preamble->Inclusions; - - Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback( - Clang->getSourceManager(), - [&Inclusions](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); })); + auto Includes = Preamble ? Preamble->Includes : IncludeStructure{}; + Clang->getPreprocessor().addPPCallbacks( + collectIncludeStructureCallback(Clang->getSourceManager(), &Includes)); if (!Action->Execute()) log("Execute() failed when building AST for " + MainInput.getFile()); @@ -179,7 +173,7 @@ Diags.insert(Diags.begin(), Preamble->Diags.begin(), Preamble->Diags.end()); return ParsedAST(std::move(Preamble), std::move(Clang), std::move(Action), std::move(ParsedDecls), std::move(Diags), - std::move(Inclusions)); + std::move(Includes)); } ParsedAST::ParsedAST(ParsedAST &&Other) = default; @@ -246,25 +240,24 @@ return Total; } -const std::vector &ParsedAST::getInclusions() const { - return Inclusions; +const IncludeStructure &ParsedAST::getIncludeStructure() const { + return Includes; } PreambleData::PreambleData(PrecompiledPreamble Preamble, - std::vector Diags, - std::vector Inclusions) + std::vector Diags, IncludeStructure Includes) : Preamble(std::move(Preamble)), Diags(std::move(Diags)), - Inclusions(std::move(Inclusions)) {} + Includes(std::move(Includes)) {} ParsedAST::ParsedAST(std::shared_ptr Preamble, std::unique_ptr Clang, std::unique_ptr Action, std::vector LocalTopLevelDecls, - std::vector Diags, std::vector Inclusions) + std::vector Diags, IncludeStructure Includes) : Preamble(std::move(Preamble)), Clang(std::move(Clang)), Action(std::move(Action)), Diags(std::move(Diags)), LocalTopLevelDecls(std::move(LocalTopLevelDecls)), - Inclusions(std::move(Inclusions)) { + Includes(std::move(Includes)) { assert(this->Clang); assert(this->Action); } @@ -350,7 +343,7 @@ " for file " + Twine(FileName)); return std::make_shared( std::move(*BuiltPreamble), PreambleDiagnostics.take(), - SerializedDeclsCollector.takeInclusions()); + SerializedDeclsCollector.takeIncludes()); } else { log("Could not build a preamble for file " + Twine(FileName)); return nullptr; Index: clangd/CodeComplete.h =================================================================== --- clangd/CodeComplete.h +++ clangd/CodeComplete.h @@ -79,7 +79,7 @@ CompletionList codeComplete(PathRef FileName, const tooling::CompileCommand &Command, PrecompiledPreamble const *Preamble, - const std::vector &PreambleInclusions, + const IncludeStructure &PreambleInclusions, StringRef Contents, Position Pos, IntrusiveRefCntPtr VFS, std::shared_ptr PCHs, Index: clangd/CodeComplete.cpp =================================================================== --- clangd/CodeComplete.cpp +++ clangd/CodeComplete.cpp @@ -21,6 +21,7 @@ #include "CodeComplete.h" #include "CodeCompletionStrings.h" #include "Compiler.h" +#include "FileDistance.h" #include "FuzzyMatch.h" #include "Headers.h" #include "Logger.h" @@ -732,7 +733,6 @@ PathRef FileName; const tooling::CompileCommand &Command; PrecompiledPreamble const *Preamble; - const std::vector &PreambleInclusions; StringRef Contents; Position Pos; IntrusiveRefCntPtr VFS; @@ -740,12 +740,11 @@ }; // Invokes Sema code completion on a file. -// If \p Includes is set, it will be initialized after a compiler instance has -// been set up. +// If \p Includes is set, it will be updated based on the compiler invocation. bool semaCodeComplete(std::unique_ptr Consumer, const clang::CodeCompleteOptions &Options, const SemaCompleteInput &Input, - std::unique_ptr *Includes = nullptr) { + IncludeStructure *Includes = nullptr) { trace::Span Tracer("Sema completion"); std::vector ArgStrs; for (const auto &S : Input.Command.CommandLine) @@ -806,28 +805,9 @@ Input.FileName); return false; } - if (Includes) { - // Initialize Includes if provided. - - // FIXME(ioeric): needs more consistent style support in clangd server. - auto Style = format::getStyle("file", Input.FileName, "LLVM", - Input.Contents, Input.VFS.get()); - if (!Style) { - log("Failed to get FormatStyle for file" + Input.FileName + - ". Fall back to use LLVM style. Error: " + - llvm::toString(Style.takeError())); - Style = format::getLLVMStyle(); - } - *Includes = llvm::make_unique( - Input.FileName, Input.Contents, *Style, Input.Command.Directory, - Clang->getPreprocessor().getHeaderSearchInfo()); - for (const auto &Inc : Input.PreambleInclusions) - Includes->get()->addExisting(Inc); - Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback( - Clang->getSourceManager(), [Includes](Inclusion Inc) { - Includes->get()->addExisting(std::move(Inc)); - })); - } + if (Includes) + Clang->getPreprocessor().addPPCallbacks( + collectIncludeStructureCallback(Clang->getSourceManager(), Includes)); if (!Action.Execute()) { log("Execute() failed when running codeComplete for " + Input.FileName); return false; @@ -917,24 +897,21 @@ // - TopN determines the results with the best score. class CodeCompleteFlow { PathRef FileName; + IncludeStructure Includes; // Complete once the compiler runs. const CodeCompleteOptions &Opts; // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup. CompletionRecorder *Recorder = nullptr; int NSema = 0, NIndex = 0, NBoth = 0; // Counters for logging. bool Incomplete = false; // Would more be available with a higher limit? llvm::Optional Filter; // Initialized once Sema runs. - std::unique_ptr Includes; // Initialized once compiler runs. - FileProximityMatcher FileProximityMatch; + llvm::Optional Inserter; // Available during runWithSema. + llvm::Optional FileProximity; // Initialized once Sema runs. public: // A CodeCompleteFlow object is only useful for calling run() exactly once. - CodeCompleteFlow(PathRef FileName, const CodeCompleteOptions &Opts) - : FileName(FileName), Opts(Opts), - // FIXME: also use path of the main header corresponding to FileName to - // calculate the file proximity, which would capture include/ and src/ - // project setup where headers and implementations are not in the same - // directory. - FileProximityMatch(ArrayRef({FileName})) {} + CodeCompleteFlow(PathRef FileName, const IncludeStructure &Includes, + const CodeCompleteOptions &Opts) + : FileName(FileName), Includes(Includes), Opts(Opts) {} CompletionList run(const SemaCompleteInput &SemaCCInput) && { trace::Span Tracer("CodeCompleteFlow"); @@ -945,11 +922,35 @@ CompletionList Output; auto RecorderOwner = llvm::make_unique(Opts, [&]() { assert(Recorder && "Recorder is not set"); - assert(Includes && "Includes is not set"); + // FIXME(ioeric): needs more consistent style support in clangd server. + auto Style = + format::getStyle("file", SemaCCInput.FileName, "LLVM", + SemaCCInput.Contents, SemaCCInput.VFS.get()); + if (!Style) { + log("Failed to get FormatStyle for file" + SemaCCInput.FileName + ": " + + llvm::toString(Style.takeError()) + ". Fallback is LLVM style."); + Style = format::getLLVMStyle(); + } // If preprocessor was run, inclusions from preprocessor callback should - // already be added to Inclusions. + // already be added to Includes. + Inserter.emplace( + SemaCCInput.FileName, SemaCCInput.Contents, + format::getLLVMStyle(), // XXX + SemaCCInput.Command.Directory, + Recorder->CCSema->getPreprocessor().getHeaderSearchInfo()); + for (const auto &Inc : Includes.MainFileIncludes) + Inserter->addExisting(Inc); + + FileDistanceOptions ProxOpts{}; // Use defaults. + const auto &SM = Recorder->CCSema->getSourceManager(); + llvm::StringMap ProxRoots = Includes.includeDepth( + SM.getFileEntryForID(SM.getMainFileID())->getName()); + for (auto &Entry : ProxRoots) + Entry.second *= ProxOpts.IncludeCost; + FileProximity.emplace(ProxRoots, ProxOpts); + Output = runWithSema(); - Includes.reset(); // Make sure this doesn't out-live Clang. + Inserter.reset(); // Make sure this doesn't out-live Clang. SPAN_ATTACH(Tracer, "sema_completion_kind", getCompletionKindString(Recorder->CCContext.getKind())); }); @@ -1010,6 +1011,7 @@ Req.RestrictForCodeCompletion = true; Req.Scopes = getQueryScopes(Recorder->CCContext, Recorder->CCSema->getSourceManager()); + // FIXME: we should send multiple weighted paths here. Req.ProximityPaths.push_back(FileName); log(llvm::formatv("Code complete: fuzzyFind(\"{0}\", scopes=[{1}])", Req.Query, @@ -1090,7 +1092,7 @@ SymbolQualitySignals Quality; SymbolRelevanceSignals Relevance; Relevance.Query = SymbolRelevanceSignals::CodeComplete; - Relevance.FileProximityMatch = &FileProximityMatch; + Relevance.FileProximityMatch = FileProximity.getPointer(); auto &First = Bundle.front(); if (auto FuzzyScore = fuzzyScore(First)) Relevance.NameMatch = *FuzzyScore; @@ -1147,7 +1149,7 @@ } return CompletionCandidate::build( Bundle, - Bundle.front().build(FileName, Scores, Opts, SemaCCS, *Includes, + Bundle.front().build(FileName, Scores, Opts, SemaCCS, *Inserter, FrontDocComment), Opts); } @@ -1156,14 +1158,13 @@ CompletionList codeComplete(PathRef FileName, const tooling::CompileCommand &Command, PrecompiledPreamble const *Preamble, - const std::vector &PreambleInclusions, + const IncludeStructure &PreambleInclusions, StringRef Contents, Position Pos, IntrusiveRefCntPtr VFS, std::shared_ptr PCHs, CodeCompleteOptions Opts) { - return CodeCompleteFlow(FileName, Opts) - .run({FileName, Command, Preamble, PreambleInclusions, Contents, Pos, VFS, - PCHs}); + return CodeCompleteFlow(FileName, PreambleInclusions, Opts) + .run({FileName, Command, Preamble, Contents, Pos, VFS, PCHs}); } SignatureHelp signatureHelp(PathRef FileName, @@ -1178,11 +1179,11 @@ Options.IncludeMacros = false; Options.IncludeCodePatterns = false; Options.IncludeBriefComments = false; - std::vector PreambleInclusions = {}; // Unused for signatureHelp + IncludeStructure PreambleInclusions; // Unused for signatureHelp semaCodeComplete(llvm::make_unique(Options, Result), Options, - {FileName, Command, Preamble, PreambleInclusions, Contents, - Pos, std::move(VFS), std::move(PCHs)}); + {FileName, Command, Preamble, Contents, Pos, std::move(VFS), + std::move(PCHs)}); return Result; } Index: clangd/FileDistance.h =================================================================== --- /dev/null +++ clangd/FileDistance.h @@ -0,0 +1,94 @@ +//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library measures the distance between file paths. +// It's used for ranking symbols, e.g. in code completion. +// |foo/bar.h -> foo/bar.h| = 0. +// |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|. +// This is an edit-distance, where edits go up or down the directory tree. +// It's not symmetrical, the costs of going up and down may not match. +// +// Dealing with multiple roots: +// In practice we care about the distance from a source file, but files near +// its main-header and #included files are considered "close". +// So we start with a set of (root, cost) pairs, and call the distance to a path +// the minimum of `cost + |root -> path|`. +// +// Symbol URI schemes: +// Symbol locations may be represented by URIs rather than file paths directly. +// In this case we want to perform distance computations in URI space rather +// than in file-space, without performing redundant conversions. +// Therefore we have a lookup structure that accepts URIs, so that intermediate +// calculations for the same scheme can be reused. +// +//===----------------------------------------------------------------------===// + +#include "URI.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/StringSaver.h" + +namespace clang { +namespace clangd { + +struct FileDistanceOptions { + unsigned UpCost = 2; // |foo/bar.h -> foo| + unsigned DownCost = 1; // |foo -> foo/bar.h| + unsigned IncludeCost = 2; // |foo.cc -> included_header.h| +}; + +// Supports lookups to find the minimum distance to a file from any root. +// This object should be reused, it memoizes intermediate computations. +class FileDistance { +public: + static constexpr unsigned kUnreachable = std::numeric_limits::max(); + + FileDistance(llvm::StringMap Roots, + const FileDistanceOptions &Opts = {}); + + // Computes the minimum distance from any root to the file path. + unsigned distance(llvm::StringRef Path); + +private: + // Costs computed so far. Always contains roots and their ancestors. + // We store hash codes only. Collisions are rare and consequences aren't dire. + llvm::DenseMap Cache; + FileDistanceOptions Opts; +}; + +// Supports lookups like FileDistance, but the lookup keys are URIs. +// We convert each of the roots to the scheme of the URI and do a FileDistance +// comparison on the bodies. +class URIDistance { +public: + URIDistance(llvm::StringMap Roots, + const FileDistanceOptions &Opts = {}) + : Roots(Roots), Opts(Opts) {} + + // Computes the minimum distance from any root to the URI. + // Only roots that can be mapped into the URI's scheme are considered. + unsigned distance(llvm::StringRef URI); + +private: + // Returns the FileDistance for a URI scheme, creating it if needed. + FileDistance &forScheme(llvm::StringRef Scheme); + + // We cache the results using the original strings so we can skip URI parsing. + llvm::DenseMap Cache; + llvm::StringMap Roots; + llvm::StringMap> ByScheme; + FileDistanceOptions Opts; +}; + +} // namespace clangd +} // namespace clang Index: clangd/FileDistance.cpp =================================================================== --- /dev/null +++ clangd/FileDistance.cpp @@ -0,0 +1,116 @@ +//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "FileDistance.h" +#include "Logger.h" +#define DEBUG_TYPE "FileDistance" + +namespace clang { +namespace clangd { +using namespace llvm; + +// Convert a path into the canonical form. +// Canonical form is either "/", or "/segment" * N: +// C:\foo\bar --> /c:/foo/bar +// /foo/ --> /foo +// a/b/c --> /a/b/c +static SmallString<128> canonicalize(StringRef Path) { + SmallString<128> Result = Path.rtrim('/'); + native(Result, sys::path::Style::posix); + if (Result.empty() || Result.front() != '/') + Result.insert(Result.begin(), '/'); + return Result; +} + +const unsigned FileDistance::kUnreachable; + +FileDistance::FileDistance(StringMap Roots, + const FileDistanceOptions &Opts) + : Opts(Opts) { + for (const auto &R : Roots) { + auto Canonical = canonicalize(R.getKey()); + LLVM_DEBUG(dbgs() << "Root " << Canonical << " = " << R.getValue() << "\n"); + // Walk up to ancestors of this root, assigning cost. + unsigned Cost = R.getValue(); + for (StringRef Rest = Canonical; !Rest.empty(); + Rest = parent_path(Rest, sys::path::Style::posix)) { + auto R = Cache.try_emplace(hash_value(Rest), Cost); + if (!R.second) { + if (Cost < R.first->second) { + R.first->second = Cost; + } else { + // If we're not the best way to get to this path, stop assigning. + break; + } + } + Cost += Opts.UpCost; + } + } +} + +unsigned FileDistance::distance(StringRef Path) { + auto Canonical = canonicalize(Path); + unsigned Cost = kUnreachable; + SmallVector Ancestors; + // Walk up ancestors until we find a path we know the distance for. + for (StringRef Rest = Canonical; !Rest.empty(); + Rest = parent_path(Rest, sys::path::Style::posix)) { + auto Hash = hash_value(Rest); + auto It = Cache.find(Hash); + if (It != Cache.end()) { + Cost = It->second; + break; + } + Ancestors.push_back(Hash); + } + // Fill in all ancestors between the query and the known. + for (unsigned I = 0; I < Ancestors.size(); ++I) { + if (Cost != kUnreachable) + Cost += Opts.DownCost; + auto Hash = Ancestors[Ancestors.size() - 1 - I]; + Cache.try_emplace(Hash, Cost); + } + LLVM_DEBUG(dbgs() << "distance(" << Path << ") = " << Cost << "\n"); + return Cost; +} + +unsigned URIDistance::distance(llvm::StringRef URI) { + auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable); + if (!R.second) + return R.first->getSecond(); + if (auto U = clangd::URI::parse(URI)) { + LLVM_DEBUG(dbgs() << "distance(" << URI << ") = distance(" << U->body() + << ")\n"); + R.first->second = forScheme(U->scheme()).distance(U->body()); + } else { + log("URIDistance::distance() of unparseable " + URI + ": " + + llvm::toString(U.takeError())); + } + return R.first->second; +} + +FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { + auto &Delegate = ByScheme[Scheme]; + if (!Delegate) { + llvm::StringMap SchemeRoots; + for (const auto &Root : Roots) { + if (auto U = clangd::URI::create(Root.getKey(), Scheme)) + SchemeRoots.try_emplace(U->body(), Root.getValue()); + else + consumeError(U.takeError()); + } + LLVM_DEBUG(dbgs() << "FileDistance for scheme " << Scheme << ": " + << SchemeRoots.size() << "/" << Roots.size() + << " roots\n"); + Delegate.reset(new FileDistance(SchemeRoots, Opts)); + } + return *Delegate; +} + +} // namespace clangd +} // namespace clang Index: clangd/Headers.h =================================================================== --- clangd/Headers.h +++ clangd/Headers.h @@ -45,10 +45,43 @@ Path Resolved; // Resolved path of included file. Empty if not resolved. }; +// Information captured about the inclusion graph in a translation unit. +// This includes detailed information about the direct #includes, and summary +// information about all transitive includes. +// +// It should be built incrementally with collectIncludeStructureCallback(). +// When we build the preamble, we capture and store its include structure along +// with the preamble data. When we use the preamble, we can copy its +// IncludeStructure and use another collectIncludeStructureCallback() to fill +// in any non-preamble inclusions. +class IncludeStructure { +public: + // All transitive includes (absolute paths), with their minimum include depth. + // Main file --> 0, #included file --> 1, etc. + std::vector MainFileIncludes; + llvm::StringMap includeDepth(llvm::StringRef MainFileName) const; + + // This updates IncludeDepth(), but not MainFileIncludes. + void recordInclude(llvm::StringRef IncludingName, + llvm::StringRef IncludedName, + llvm::StringRef IncludedRealName); + +private: + // Identifying files in a way that persists from preamble build to subsequent + // builds is surprisingly hard. FileID is unavailable in InclusionDirective(), + // and RealPathName and UniqueID are not preseved in the preamble. + // We use the FileEntry::Name, which is stable. + // The paths we want to expose are the RealPathName, so store those too. + unsigned fileNumber(llvm::StringRef Name); + llvm::StringMap NameToIndex; // Values are file numbers. + std::vector RealPathNames; // Indexed by the file number. + // Maps a file's number to that of the files it includes. + llvm::DenseMap> IncludeChildren; +}; + /// Returns a PPCallback that visits all inclusions in the main file. std::unique_ptr -collectInclusionsInMainFileCallback(const SourceManager &SM, - std::function Callback); +collectIncludeStructureCallback(const SourceManager &SM, IncludeStructure *Out); // Calculates insertion edit for including a new header in a file. class IncludeInserter { Index: clangd/Headers.cpp =================================================================== --- clangd/Headers.cpp +++ clangd/Headers.cpp @@ -23,9 +23,10 @@ class RecordHeaders : public PPCallbacks { public: - RecordHeaders(const SourceManager &SM, - std::function Callback) - : SM(SM), Callback(std::move(Callback)) {} + RecordHeaders(const SourceManager &SM, IncludeStructure *Out) + : SM(SM), Out(Out) { + SM.getMainFileID(); + } // Record existing #includes - both written and resolved paths. Only #includes // in the main file are collected. @@ -36,21 +37,28 @@ llvm::StringRef /*RelativePath*/, const Module * /*Imported*/, SrcMgr::CharacteristicKind /*FileType*/) override { - // Only inclusion directives in the main file make sense. The user cannot - // select directives not in the main file. - if (HashLoc.isInvalid() || !SM.isInMainFile(HashLoc)) - return; - std::string Written = - (IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str(); - std::string Resolved = (!File || File->tryGetRealPathName().empty()) - ? "" - : File->tryGetRealPathName(); - Callback({halfOpenToRange(SM, FilenameRange), Written, Resolved}); + if (SM.isInMainFile(HashLoc)) + Out->MainFileIncludes.push_back({ + halfOpenToRange(SM, FilenameRange), + (IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str(), + File ? File->tryGetRealPathName() : "", + }); + if (File) { + auto *IncludingFileEntry = SM.getFileEntryForID(SM.getFileID(HashLoc)); + if (!IncludingFileEntry) { + assert(SM.getBufferName(HashLoc).startswith("<") && + "Expected #include location to be a file or "); + // Treat as if included from the main file. + IncludingFileEntry = SM.getFileEntryForID(SM.getMainFileID()); + } + Out->recordInclude(IncludingFileEntry->getName(), File->getName(), + File->tryGetRealPathName()); + } } private: const SourceManager &SM; - std::function Callback; + IncludeStructure *Out; }; } // namespace @@ -65,9 +73,56 @@ } std::unique_ptr -collectInclusionsInMainFileCallback(const SourceManager &SM, - std::function Callback) { - return llvm::make_unique(SM, std::move(Callback)); +collectIncludeStructureCallback(const SourceManager &SM, + IncludeStructure *Out) { + return llvm::make_unique(SM, Out); +} + +void IncludeStructure::recordInclude(llvm::StringRef IncludingName, + llvm::StringRef IncludedName, + llvm::StringRef IncludedRealName) { + auto Child = fileNumber(IncludedName); + if (!IncludedRealName.empty() && RealPathNames[Child].empty()) + RealPathNames[Child] = IncludedRealName; + auto Parent = fileNumber(IncludingName); + IncludeChildren[Parent].push_back(Child); +} + +unsigned IncludeStructure::fileNumber(llvm::StringRef Name) { + auto R = NameToIndex.try_emplace(Name, RealPathNames.size()); + if (R.second) + RealPathNames.emplace_back(); + return R.first->getValue(); +} + +llvm::StringMap +IncludeStructure::includeDepth(llvm::StringRef MainFileName) const { + // Include depth 0 is the main file only. + llvm::StringMap Result; + Result[MainFileName] = 0; + std::vector CurrentLevel; + llvm::DenseSet Seen; + auto It = NameToIndex.find(MainFileName); + if (It != NameToIndex.end()) { + CurrentLevel.push_back(It->second); + Seen.insert(It->second); + } + + // Each round of BFS traversal finds the next depth level. + std::vector PreviousLevel; + for (unsigned Level = 1; !CurrentLevel.empty(); ++Level) { + PreviousLevel.clear(); + PreviousLevel.swap(CurrentLevel); + for (const auto &Parent : PreviousLevel) { + for (const auto &Child : IncludeChildren.lookup(Parent)) { + if (Seen.insert(Child).second) { + CurrentLevel.push_back(Child); + Result[RealPathNames[Child]] = Level; + } + } + } + } + return Result; } /// FIXME(ioeric): we might not want to insert an absolute include path if the Index: clangd/Quality.h =================================================================== --- clangd/Quality.h +++ clangd/Quality.h @@ -38,6 +38,7 @@ class CodeCompletionResult; namespace clangd { struct Symbol; +class URIDistance; // Signals structs are designed to be aggregated from 0 or more sources. // A default instance has neutral signals, and sources are merged into it. @@ -69,15 +70,13 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolQualitySignals &); -class FileProximityMatcher; - /// Attributes of a symbol-query pair that affect how much we like it. struct SymbolRelevanceSignals { /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. float NameMatch = 1; bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). - const FileProximityMatcher *FileProximityMatch = nullptr; + URIDistance *FileProximityMatch = nullptr; /// This is used to calculate proximity between the index symbol and the /// query. llvm::StringRef SymbolURI; @@ -111,25 +110,6 @@ /// Combine symbol quality and relevance into a single score. float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); -class FileProximityMatcher { - public: - /// \p ProximityPaths are used to compute proximity scores from symbol's - /// declaring file. The best score will be used. - explicit FileProximityMatcher( - llvm::ArrayRef ProximityPaths); - - /// Calculates the best proximity score from proximity paths to the symbol's - /// URI. Score is [0-1], 1 means \p SymbolURI exactly matches a proximity - /// path. When a path cannot be encoded into the same scheme as \p - /// SymbolURI, the proximity will be 0. - float uriProximity(llvm::StringRef SymbolURI) const; - - private: - llvm::SmallVector ProximityPaths; - friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, - const FileProximityMatcher &); -}; - /// TopN is a lossy container that preserves only the "best" N elements. template > class TopN { public: Index: clangd/Quality.cpp =================================================================== --- clangd/Quality.cpp +++ clangd/Quality.cpp @@ -7,11 +7,12 @@ // //===---------------------------------------------------------------------===// #include "Quality.h" +#include "FileDistance.h" #include "URI.h" #include "index/Index.h" #include "clang/AST/ASTContext.h" -#include "clang/Basic/CharInfo.h" #include "clang/AST/DeclVisitor.h" +#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceManager.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "llvm/Support/FormatVariadic.h" @@ -186,60 +187,6 @@ return OS; } -/// Calculates a proximity score from \p From and \p To, which are URI strings -/// that have the same scheme. This does not parse URI. A URI (sans ":") -/// is split into chunks by '/' and each chunk is considered a file/directory. -/// For example, "uri:///a/b/c" will be treated as /a/b/c -static float uriProximity(StringRef From, StringRef To) { - auto SchemeSplitFrom = From.split(':'); - auto SchemeSplitTo = To.split(':'); - assert((SchemeSplitFrom.first == SchemeSplitTo.first) && - "URIs must have the same scheme in order to compute proximity."); - auto Split = [](StringRef URIWithoutScheme) { - SmallVector Split; - URIWithoutScheme.split(Split, '/', /*MaxSplit=*/-1, /*KeepEmpty=*/false); - return Split; - }; - SmallVector Fs = Split(SchemeSplitFrom.second); - SmallVector Ts = Split(SchemeSplitTo.second); - auto F = Fs.begin(), T = Ts.begin(), FE = Fs.end(), TE = Ts.end(); - for (; F != FE && T != TE && *F == *T; ++F, ++T) { - } - // We penalize for traversing up and down from \p From to \p To but penalize - // less for traversing down because subprojects are more closely related than - // superprojects. - int UpDist = FE - F; - int DownDist = TE - T; - return std::pow(0.7, UpDist + DownDist/2); -} - -FileProximityMatcher::FileProximityMatcher(ArrayRef ProximityPaths) - : ProximityPaths(ProximityPaths.begin(), ProximityPaths.end()) {} - -float FileProximityMatcher::uriProximity(StringRef SymbolURI) const { - float Score = 0; - if (!ProximityPaths.empty() && !SymbolURI.empty()) { - for (const auto &Path : ProximityPaths) - // Only calculate proximity score for two URIs with the same scheme so - // that the computation can be purely text-based and thus avoid expensive - // URI encoding/decoding. - if (auto U = URI::create(Path, SymbolURI.split(':').first)) { - Score = std::max(Score, clangd::uriProximity(U->toString(), SymbolURI)); - } else { - llvm::consumeError(U.takeError()); - } - } - return Score; -} - -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const FileProximityMatcher &M) { - OS << formatv("File proximity matcher: "); - OS << formatv("ProximityPaths{{0}}", llvm::join(M.ProximityPaths.begin(), - M.ProximityPaths.end(), ",")); - return OS; -} - static SymbolRelevanceSignals::AccessibleScope ComputeScope(const NamedDecl &D) { bool InClass = false; @@ -283,6 +230,15 @@ Scope = std::min(Scope, ComputeScope(*SemaCCResult.Declaration)); } +std::pair proximityScore(llvm::StringRef SymbolURI, + URIDistance *D) { + if (!D || SymbolURI.empty()) + return {0.f, 0u}; + unsigned Distance = D->distance(SymbolURI); + // Assume approximately default options are used for sensible scoring. + return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance}; +} + float SymbolRelevanceSignals::evaluate() const { float Score = 1; @@ -291,11 +247,10 @@ Score *= NameMatch; - float IndexProximityScore = - FileProximityMatch ? FileProximityMatch->uriProximity(SymbolURI) : 0; // Proximity scores are [0,1] and we translate them into a multiplier in the - // range from 1 to 2. - Score *= 1 + std::max(IndexProximityScore, SemaProximityScore); + // range from 1 to 3. + Score *= 1 + 2 * std::max(proximityScore(SymbolURI, FileProximityMatch).first, + SemaProximityScore); // Symbols like local variables may only be referenced within their scope. // Conversely if we're in that scope, it's likely we'll reference them. @@ -326,9 +281,9 @@ OS << formatv("\tForbidden: {0}\n", S.Forbidden); OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI); if (S.FileProximityMatch) { - OS << formatv("\tIndex proximity: {0}\n", - S.FileProximityMatch->uriProximity(S.SymbolURI)) - << " (" << *S.FileProximityMatch << ")\n"; + auto Score = proximityScore(S.SymbolURI, S.FileProximityMatch); + OS << formatv("\tIndex proximity: {0} (distance={1})\n", Score.first, + Score.second); } OS << formatv("\tSema proximity: {0}\n", S.SemaProximityScore); OS << formatv("\tQuery type: {0}\n", static_cast(S.Query)); Index: clangd/XRefs.cpp =================================================================== --- clangd/XRefs.cpp +++ clangd/XRefs.cpp @@ -229,13 +229,10 @@ std::vector findDefinitions(ParsedAST &AST, Position Pos, const SymbolIndex *Index) { const SourceManager &SourceMgr = AST.getASTContext().getSourceManager(); - SourceLocation SourceLocationBeg = - getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID()); std::vector Result; // Handle goto definition for #include. - for (auto &Inc : AST.getInclusions()) { - Position Pos = sourceLocToPosition(SourceMgr, SourceLocationBeg); + for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) { if (!Inc.Resolved.empty() && Inc.R.contains(Pos)) Result.push_back(Location{URIForFile{Inc.Resolved}, {}}); } @@ -243,6 +240,8 @@ return Result; // Identified symbols at a specific position. + SourceLocation SourceLocationBeg = + getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID()); auto Symbols = getSymbolAtPosition(AST, SourceLocationBeg); for (auto Item : Symbols.Macros) { Index: unittests/clangd/CMakeLists.txt =================================================================== --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -17,6 +17,7 @@ ContextTests.cpp DraftStoreTests.cpp FileIndexTests.cpp + FileDistanceTests.cpp FindSymbolsTests.cpp FuzzyMatchTests.cpp GlobalCompilationDatabaseTests.cpp Index: unittests/clangd/FileDistanceTests.cpp =================================================================== --- /dev/null +++ unittests/clangd/FileDistanceTests.cpp @@ -0,0 +1,60 @@ +//===-- FileDistanceTests.cpp ------------------------*- C++ -*-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "FileDistance.h" +#include "TestFS.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { + +TEST(FileDistanceTests, Distance) { + FileDistanceOptions Opts; + Opts.UpCost = 5; + Opts.DownCost = 3; + FileDistance D({{"tools/clang/lib/Format/FormatToken.cpp", 0}, + {"tools/clang/include/clang/Format/FormatToken.h", 0}, + {"include/llvm/ADT/StringRef.h", 2}}, + Opts); + + // Root + EXPECT_EQ(D.distance("tools/clang/lib/Format/FormatToken.cpp"), 0u); + EXPECT_EQ(D.distance("include/llvm/ADT/StringRef.h"), 2u); + // Parent + EXPECT_EQ(D.distance("tools/clang/lib/Format/"), 5u); + // Child + EXPECT_EQ(D.distance("tools/clang/lib/Format/FormatToken.cpp/Oops"), 3u); + // Ancestor (up+up+up+up) + EXPECT_EQ(D.distance("/"), 22u); + // Sibling (up+down) + EXPECT_EQ(D.distance("tools/clang/lib/Format/AnotherFile.cpp"), 8u); + // Cousin (up+up+down+down) + EXPECT_EQ(D.distance("include/llvm/Support/Allocator.h"), 18u); + // First cousin, once removed (up+up+up+down+down) + EXPECT_EQ(D.distance("include/llvm-c/Core.h"), 23u); +} + +auto UseUnittestScheme = UnittestSchemeAnchorSource; + +TEST(FileDistanceTests, URI) { + FileDistanceOptions Opts; + Opts.UpCost = 5; + Opts.DownCost = 3; + + URIDistance D({{testPath("foo"), 1000}, {"/not/a/testpath", 0}}, Opts); + EXPECT_EQ(D.distance("file:///not/a/testpath/either"), 3u); + EXPECT_EQ(D.distance("unittest:foo"), 1000u); + EXPECT_EQ(D.distance("unittest:bar"), 1008u); +} + +} // namespace +} // namespace clangd +} // namespace clang Index: unittests/clangd/HeadersTests.cpp =================================================================== --- unittests/clangd/HeadersTests.cpp +++ unittests/clangd/HeadersTests.cpp @@ -64,18 +64,17 @@ } protected: - std::vector collectIncludes() { + IncludeStructure collectIncludes() { auto Clang = setupClang(); PreprocessOnlyAction Action; EXPECT_TRUE( Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])); - std::vector Inclusions; - Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback( - Clang->getSourceManager(), - [&](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); })); + IncludeStructure Includes; + Clang->getPreprocessor().addPPCallbacks( + collectIncludeStructureCallback(Clang->getSourceManager(), &Includes)); EXPECT_TRUE(Action.Execute()); Action.EndSourceFile(); - return Inclusions; + return Includes; } // Calculates the include path, or returns "" on error or header should not be @@ -133,6 +132,14 @@ MATCHER_P(Written, Name, "") { return arg.Written == Name; } MATCHER_P(Resolved, Name, "") { return arg.Resolved == Name; } +MATCHER_P2(Distance, File, D, "") { + if (arg.getKey() != File) + *result_listener << "file =" << arg.getKey().str(); + if (arg.getValue() != D) + *result_listener << "distance =" << arg.getValue(); + return arg.getKey() == File && arg.getValue() == D; +} + TEST_F(HeadersTest, CollectRewrittenAndResolved) { FS.Files[MainFile] = R"cpp( #include "sub/bar.h" // not shortest @@ -140,9 +147,12 @@ std::string BarHeader = testPath("sub/bar.h"); FS.Files[BarHeader] = ""; - EXPECT_THAT(collectIncludes(), + EXPECT_THAT(collectIncludes().MainFileIncludes, UnorderedElementsAre( AllOf(Written("\"sub/bar.h\""), Resolved(BarHeader)))); + EXPECT_THAT(collectIncludes().includeDepth(MainFile), + UnorderedElementsAre(Distance(MainFile, 0u), + Distance(testPath("sub/bar.h"), 1u))); } TEST_F(HeadersTest, OnlyCollectInclusionsInMain) { @@ -156,8 +166,12 @@ #include "bar.h" )cpp"; EXPECT_THAT( - collectIncludes(), + collectIncludes().MainFileIncludes, UnorderedElementsAre(AllOf(Written("\"bar.h\""), Resolved(BarHeader)))); + EXPECT_THAT(collectIncludes().includeDepth(MainFile), + UnorderedElementsAre(Distance(MainFile, 0u), + Distance(testPath("sub/bar.h"), 1u), + Distance(testPath("sub/baz.h"), 2u))); } TEST_F(HeadersTest, UnResolvedInclusion) { @@ -165,8 +179,10 @@ #include "foo.h" )cpp"; - EXPECT_THAT(collectIncludes(), + EXPECT_THAT(collectIncludes().MainFileIncludes, UnorderedElementsAre(AllOf(Written("\"foo.h\""), Resolved("")))); + EXPECT_THAT(collectIncludes().includeDepth(MainFile), + UnorderedElementsAre(Distance(MainFile, 0u))); } TEST_F(HeadersTest, InsertInclude) { Index: unittests/clangd/QualityTests.cpp =================================================================== --- unittests/clangd/QualityTests.cpp +++ unittests/clangd/QualityTests.cpp @@ -17,6 +17,7 @@ // //===----------------------------------------------------------------------===// +#include "FileDistance.h" #include "Quality.h" #include "TestFS.h" #include "TestTU.h" @@ -157,9 +158,22 @@ PoorNameMatch.NameMatch = 0.2f; EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate()); - SymbolRelevanceSignals WithProximity; - WithProximity.SemaProximityScore = 0.2f; - EXPECT_GT(WithProximity.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithSemaProximity; + WithSemaProximity.SemaProximityScore = 0.2f; + EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate()); + + SymbolRelevanceSignals IndexProximate; + IndexProximate.SymbolURI = "unittest:/foo/bar.h"; + llvm::StringMap ProxRoots; + ProxRoots[testPath("foo/baz.h")] = 0; + URIDistance Distance(ProxRoots); + IndexProximate.FileProximityMatch = &Distance; + EXPECT_GT(IndexProximate.evaluate(), Default.evaluate()); + SymbolRelevanceSignals IndexDistant = IndexProximate; + IndexDistant.SymbolURI = "unittest:/elsewhere/path.h"; + EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate()) + << IndexProximate << IndexDistant; + EXPECT_GT(IndexDistant.evaluate(), Default.evaluate()); SymbolRelevanceSignals Scoped; Scoped.Scope = SymbolRelevanceSignals::FileScope; @@ -180,59 +194,6 @@ EXPECT_LT(sortText(0, "a"), sortText(0, "z")); } -// {a,b,c} becomes /clangd-test/a/b/c -std::string joinPaths(llvm::ArrayRef Parts) { - return testPath( - llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator())); -} - -static constexpr float ProximityBase = 0.7; - -// Calculates a proximity score for an index symbol with declaration file -// SymPath with the given URI scheme. -float URIProximity(const FileProximityMatcher &Matcher, StringRef SymPath, - StringRef Scheme = "file") { - auto U = URI::create(SymPath, Scheme); - EXPECT_TRUE(static_cast(U)) << llvm::toString(U.takeError()); - return Matcher.uriProximity(U->toString()); -} - -TEST(QualityTests, URIProximityScores) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})}); - - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})), - ProximityBase); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "y", "z"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})), - std::pow(ProximityBase, 2)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})), - std::pow(ProximityBase, 6)); - // Note the common directory is /clang-test/ - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})), - std::pow(ProximityBase, 7)); -} - -TEST(QualityTests, URIProximityScoresWithTestURI) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"b", "c", "x"})}); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "c", "x"}), "unittest"), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "y"}), "unittest"), - std::pow(ProximityBase, 2)); - // unittest:///b/c/x vs unittest:///m/n/y. No common directory. - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "y"}), "unittest"), - std::pow(ProximityBase, 4)); -} - } // namespace } // namespace clangd } // namespace clang