Index: clang-tools-extra/trunk/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/trunk/clangd/CMakeLists.txt +++ clang-tools-extra/trunk/clangd/CMakeLists.txt @@ -19,6 +19,7 @@ Diagnostics.cpp DraftStore.cpp FindSymbols.cpp + FileDistance.cpp FuzzyMatch.cpp GlobalCompilationDatabase.cpp Headers.cpp Index: clang-tools-extra/trunk/clangd/ClangdServer.cpp =================================================================== --- clang-tools-extra/trunk/clangd/ClangdServer.cpp +++ clang-tools-extra/trunk/clangd/ClangdServer.cpp @@ -167,7 +167,7 @@ // both the old and the new version in case only one of them matches. CodeCompleteResult Result = clangd::codeComplete( File, IP->Command, PreambleData ? &PreambleData->Preamble : nullptr, - PreambleData ? PreambleData->Inclusions : std::vector(), + PreambleData ? PreambleData->Includes : IncludeStructure(), IP->Contents, Pos, FS, PCHs, CodeCompleteOpts); CB(std::move(Result)); }; Index: clang-tools-extra/trunk/clangd/ClangdUnit.h =================================================================== --- clang-tools-extra/trunk/clangd/ClangdUnit.h +++ clang-tools-extra/trunk/clangd/ClangdUnit.h @@ -45,14 +45,14 @@ // Stores Preamble and associated data. struct PreambleData { PreambleData(PrecompiledPreamble Preamble, std::vector Diags, - std::vector Inclusions); + IncludeStructure Includes); tooling::CompileCommand CompileCommand; PrecompiledPreamble Preamble; std::vector Diags; // Processes like code completions and go-to-definitions will need #include // information, and their compile action skips preamble range. - std::vector Inclusions; + IncludeStructure Includes; }; /// Information required to run clang, e.g. to parse AST or do code completion. @@ -99,14 +99,14 @@ /// Returns the esitmated size of the AST and the accessory structures, in /// bytes. Does not include the size of the preamble. std::size_t getUsedBytes() const; - const std::vector &getInclusions() const; + const IncludeStructure &getIncludeStructure() const; private: ParsedAST(std::shared_ptr Preamble, std::unique_ptr Clang, std::unique_ptr Action, std::vector LocalTopLevelDecls, std::vector Diags, - std::vector Inclusions); + IncludeStructure Includes); // In-memory preambles must outlive the AST, it is important that this member // goes before Clang and Action. @@ -124,7 +124,7 @@ // Top-level decls inside the current file. Not that this does not include // top-level decls from the preamble. std::vector LocalTopLevelDecls; - std::vector Inclusions; + IncludeStructure Includes; }; using PreambleParsedCallback = std::function takeInclusions() { return std::move(Inclusions); } + IncludeStructure takeIncludes() { return std::move(Includes); } void AfterExecute(CompilerInstance &CI) override { if (!ParsedCallback) @@ -103,15 +103,13 @@ std::unique_ptr createPPCallbacks() override { assert(SourceMgr && "SourceMgr must be set at this point"); - return collectInclusionsInMainFileCallback( - *SourceMgr, - [this](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); }); + return collectIncludeStructureCallback(*SourceMgr, &Includes); } private: PathRef File; PreambleParsedCallback ParsedCallback; - std::vector Inclusions; + IncludeStructure Includes; SourceManager *SourceMgr = nullptr; }; @@ -153,15 +151,11 @@ return llvm::None; } - std::vector Inclusions; // Copy over the includes from the preamble, then combine with the // non-preamble includes below. - if (Preamble) - Inclusions = Preamble->Inclusions; - - Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback( - Clang->getSourceManager(), - [&Inclusions](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); })); + auto Includes = Preamble ? Preamble->Includes : IncludeStructure{}; + Clang->getPreprocessor().addPPCallbacks( + collectIncludeStructureCallback(Clang->getSourceManager(), &Includes)); if (!Action->Execute()) log("Execute() failed when building AST for " + MainInput.getFile()); @@ -179,7 +173,7 @@ Diags.insert(Diags.begin(), Preamble->Diags.begin(), Preamble->Diags.end()); return ParsedAST(std::move(Preamble), std::move(Clang), std::move(Action), std::move(ParsedDecls), std::move(Diags), - std::move(Inclusions)); + std::move(Includes)); } ParsedAST::ParsedAST(ParsedAST &&Other) = default; @@ -246,25 +240,24 @@ return Total; } -const std::vector &ParsedAST::getInclusions() const { - return Inclusions; +const IncludeStructure &ParsedAST::getIncludeStructure() const { + return Includes; } PreambleData::PreambleData(PrecompiledPreamble Preamble, - std::vector Diags, - std::vector Inclusions) + std::vector Diags, IncludeStructure Includes) : Preamble(std::move(Preamble)), Diags(std::move(Diags)), - Inclusions(std::move(Inclusions)) {} + Includes(std::move(Includes)) {} ParsedAST::ParsedAST(std::shared_ptr Preamble, std::unique_ptr Clang, std::unique_ptr Action, std::vector LocalTopLevelDecls, - std::vector Diags, std::vector Inclusions) + std::vector Diags, IncludeStructure Includes) : Preamble(std::move(Preamble)), Clang(std::move(Clang)), Action(std::move(Action)), Diags(std::move(Diags)), LocalTopLevelDecls(std::move(LocalTopLevelDecls)), - Inclusions(std::move(Inclusions)) { + Includes(std::move(Includes)) { assert(this->Clang); assert(this->Action); } @@ -350,7 +343,7 @@ " for file " + Twine(FileName)); return std::make_shared( std::move(*BuiltPreamble), PreambleDiagnostics.take(), - SerializedDeclsCollector.takeInclusions()); + SerializedDeclsCollector.takeIncludes()); } else { log("Could not build a preamble for file " + Twine(FileName)); return nullptr; Index: clang-tools-extra/trunk/clangd/CodeComplete.h =================================================================== --- clang-tools-extra/trunk/clangd/CodeComplete.h +++ clang-tools-extra/trunk/clangd/CodeComplete.h @@ -144,12 +144,14 @@ raw_ostream &operator<<(raw_ostream &, const CodeCompleteResult &); /// Get code completions at a specified \p Pos in \p FileName. -CodeCompleteResult codeComplete( - PathRef FileName, const tooling::CompileCommand &Command, - PrecompiledPreamble const *Preamble, - const std::vector &PreambleInclusions, StringRef Contents, - Position Pos, IntrusiveRefCntPtr VFS, - std::shared_ptr PCHs, CodeCompleteOptions Opts); +CodeCompleteResult codeComplete(PathRef FileName, + const tooling::CompileCommand &Command, + PrecompiledPreamble const *Preamble, + const IncludeStructure &PreambleInclusions, + StringRef Contents, Position Pos, + IntrusiveRefCntPtr VFS, + std::shared_ptr PCHs, + CodeCompleteOptions Opts); /// Get signature help at a specified \p Pos in \p FileName. SignatureHelp signatureHelp(PathRef FileName, Index: clang-tools-extra/trunk/clangd/CodeComplete.cpp =================================================================== --- clang-tools-extra/trunk/clangd/CodeComplete.cpp +++ clang-tools-extra/trunk/clangd/CodeComplete.cpp @@ -22,6 +22,7 @@ #include "AST.h" #include "CodeCompletionStrings.h" #include "Compiler.h" +#include "FileDistance.h" #include "FuzzyMatch.h" #include "Headers.h" #include "Logger.h" @@ -763,7 +764,6 @@ PathRef FileName; const tooling::CompileCommand &Command; PrecompiledPreamble const *Preamble; - const std::vector &PreambleInclusions; StringRef Contents; Position Pos; IntrusiveRefCntPtr VFS; @@ -771,12 +771,11 @@ }; // Invokes Sema code completion on a file. -// If \p Includes is set, it will be initialized after a compiler instance has -// been set up. +// If \p Includes is set, it will be updated based on the compiler invocation. bool semaCodeComplete(std::unique_ptr Consumer, const clang::CodeCompleteOptions &Options, const SemaCompleteInput &Input, - std::unique_ptr *Includes = nullptr) { + IncludeStructure *Includes = nullptr) { trace::Span Tracer("Sema completion"); std::vector ArgStrs; for (const auto &S : Input.Command.CommandLine) @@ -837,29 +836,9 @@ Input.FileName); return false; } - if (Includes) { - // Initialize Includes if provided. - - // FIXME(ioeric): needs more consistent style support in clangd server. - auto Style = format::getStyle(format::DefaultFormatStyle, Input.FileName, - format::DefaultFallbackStyle, Input.Contents, - Input.VFS.get()); - if (!Style) { - log("ERROR: failed to get FormatStyle for file " + Input.FileName + - ". Fall back to use LLVM style. Error: " + - llvm::toString(Style.takeError())); - Style = format::getLLVMStyle(); - } - *Includes = llvm::make_unique( - Input.FileName, Input.Contents, *Style, Input.Command.Directory, - Clang->getPreprocessor().getHeaderSearchInfo()); - for (const auto &Inc : Input.PreambleInclusions) - Includes->get()->addExisting(Inc); - Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback( - Clang->getSourceManager(), [Includes](Inclusion Inc) { - Includes->get()->addExisting(std::move(Inc)); - })); - } + if (Includes) + Clang->getPreprocessor().addPPCallbacks( + collectIncludeStructureCallback(Clang->getSourceManager(), Includes)); if (!Action.Execute()) { log("Execute() failed when running codeComplete for " + Input.FileName); return false; @@ -949,24 +928,23 @@ // - TopN determines the results with the best score. class CodeCompleteFlow { PathRef FileName; + IncludeStructure Includes; // Complete once the compiler runs. const CodeCompleteOptions &Opts; // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup. CompletionRecorder *Recorder = nullptr; int NSema = 0, NIndex = 0, NBoth = 0; // Counters for logging. bool Incomplete = false; // Would more be available with a higher limit? llvm::Optional Filter; // Initialized once Sema runs. - std::unique_ptr Includes; // Initialized once compiler runs. - FileProximityMatcher FileProximityMatch; + // Include-insertion and proximity scoring rely on the include structure. + // This is available after Sema has run. + llvm::Optional Inserter; // Available during runWithSema. + llvm::Optional FileProximity; // Initialized once Sema runs. public: // A CodeCompleteFlow object is only useful for calling run() exactly once. - CodeCompleteFlow(PathRef FileName, const CodeCompleteOptions &Opts) - : FileName(FileName), Opts(Opts), - // FIXME: also use path of the main header corresponding to FileName to - // calculate the file proximity, which would capture include/ and src/ - // project setup where headers and implementations are not in the same - // directory. - FileProximityMatch(ArrayRef({FileName})) {} + CodeCompleteFlow(PathRef FileName, const IncludeStructure &Includes, + const CodeCompleteOptions &Opts) + : FileName(FileName), Includes(Includes), Opts(Opts) {} CodeCompleteResult run(const SemaCompleteInput &SemaCCInput) && { trace::Span Tracer("CodeCompleteFlow"); @@ -977,11 +955,45 @@ CodeCompleteResult Output; auto RecorderOwner = llvm::make_unique(Opts, [&]() { assert(Recorder && "Recorder is not set"); - assert(Includes && "Includes is not set"); + // FIXME(ioeric): needs more consistent style support in clangd server. + auto Style = + format::getStyle("file", SemaCCInput.FileName, "LLVM", + SemaCCInput.Contents, SemaCCInput.VFS.get()); + if (!Style) { + log("Failed to get FormatStyle for file" + SemaCCInput.FileName + ": " + + llvm::toString(Style.takeError()) + ". Fallback is LLVM style."); + Style = format::getLLVMStyle(); + } // If preprocessor was run, inclusions from preprocessor callback should - // already be added to Inclusions. + // already be added to Includes. + Inserter.emplace( + SemaCCInput.FileName, SemaCCInput.Contents, *Style, + SemaCCInput.Command.Directory, + Recorder->CCSema->getPreprocessor().getHeaderSearchInfo()); + for (const auto &Inc : Includes.MainFileIncludes) + Inserter->addExisting(Inc); + + // Most of the cost of file proximity is in initializing the FileDistance + // structures based on the observed includes, once per query. Conceptually + // that happens here (though the per-URI-scheme initialization is lazy). + // The per-result proximity scoring is (amortized) very cheap. + FileDistanceOptions ProxOpts{}; // Use defaults. + const auto &SM = Recorder->CCSema->getSourceManager(); + llvm::StringMap ProxSources; + for (auto &Entry : Includes.includeDepth( + SM.getFileEntryForID(SM.getMainFileID())->getName())) { + auto &Source = ProxSources[Entry.getKey()]; + Source.Cost = Entry.getValue() * ProxOpts.IncludeCost; + // Symbols near our transitive includes are good, but only consider + // things in the same directory or below it. Otherwise there can be + // many false positives. + if (Entry.getValue() > 0) + Source.MaxUpTraversals = 1; + } + FileProximity.emplace(ProxSources, ProxOpts); + Output = runWithSema(); - Includes.reset(); // Make sure this doesn't out-live Clang. + Inserter.reset(); // Make sure this doesn't out-live Clang. SPAN_ATTACH(Tracer, "sema_completion_kind", getCompletionKindString(Recorder->CCContext.getKind())); }); @@ -1044,6 +1056,7 @@ Req.RestrictForCodeCompletion = true; Req.Scopes = getQueryScopes(Recorder->CCContext, Recorder->CCSema->getSourceManager()); + // FIXME: we should send multiple weighted paths here. Req.ProximityPaths.push_back(FileName); log(llvm::formatv("Code complete: fuzzyFind(\"{0}\", scopes=[{1}])", Req.Query, @@ -1124,7 +1137,7 @@ SymbolQualitySignals Quality; SymbolRelevanceSignals Relevance; Relevance.Query = SymbolRelevanceSignals::CodeComplete; - Relevance.FileProximityMatch = &FileProximityMatch; + Relevance.FileProximityMatch = FileProximity.getPointer(); auto &First = Bundle.front(); if (auto FuzzyScore = fuzzyScore(First)) Relevance.NameMatch = *FuzzyScore; @@ -1174,7 +1187,7 @@ : nullptr; if (!Builder) Builder.emplace(Recorder->CCSema->getASTContext(), Item, SemaCCS, - *Includes, FileName, Opts); + *Inserter, FileName, Opts); else Builder->add(Item, SemaCCS); } @@ -1182,15 +1195,16 @@ } }; -CodeCompleteResult codeComplete( - PathRef FileName, const tooling::CompileCommand &Command, - PrecompiledPreamble const *Preamble, - const std::vector &PreambleInclusions, StringRef Contents, - Position Pos, IntrusiveRefCntPtr VFS, - std::shared_ptr PCHs, CodeCompleteOptions Opts) { - return CodeCompleteFlow(FileName, Opts) - .run({FileName, Command, Preamble, PreambleInclusions, Contents, Pos, VFS, - PCHs}); +CodeCompleteResult codeComplete(PathRef FileName, + const tooling::CompileCommand &Command, + PrecompiledPreamble const *Preamble, + const IncludeStructure &PreambleInclusions, + StringRef Contents, Position Pos, + IntrusiveRefCntPtr VFS, + std::shared_ptr PCHs, + CodeCompleteOptions Opts) { + return CodeCompleteFlow(FileName, PreambleInclusions, Opts) + .run({FileName, Command, Preamble, Contents, Pos, VFS, PCHs}); } SignatureHelp signatureHelp(PathRef FileName, @@ -1205,11 +1219,11 @@ Options.IncludeMacros = false; Options.IncludeCodePatterns = false; Options.IncludeBriefComments = false; - std::vector PreambleInclusions = {}; // Unused for signatureHelp + IncludeStructure PreambleInclusions; // Unused for signatureHelp semaCodeComplete(llvm::make_unique(Options, Result), Options, - {FileName, Command, Preamble, PreambleInclusions, Contents, - Pos, std::move(VFS), std::move(PCHs)}); + {FileName, Command, Preamble, Contents, Pos, std::move(VFS), + std::move(PCHs)}); return Result; } Index: clang-tools-extra/trunk/clangd/FileDistance.h =================================================================== --- clang-tools-extra/trunk/clangd/FileDistance.h +++ clang-tools-extra/trunk/clangd/FileDistance.h @@ -0,0 +1,109 @@ +//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library measures the distance between file paths. +// It's used for ranking symbols, e.g. in code completion. +// |foo/bar.h -> foo/bar.h| = 0. +// |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|. +// This is an edit-distance, where edits go up or down the directory tree. +// It's not symmetrical, the costs of going up and down may not match. +// +// Dealing with multiple sources: +// In practice we care about the distance from a source file, but files near +// its main-header and #included files are considered "close". +// So we start with a set of (anchor, cost) pairs, and call the distance to a +// path the minimum of `cost + |source -> path|`. +// +// We allow each source to limit the number of up-traversals paths may start +// with. Up-traversals may reach things that are not "semantically near". +// +// Symbol URI schemes: +// Symbol locations may be represented by URIs rather than file paths directly. +// In this case we want to perform distance computations in URI space rather +// than in file-space, without performing redundant conversions. +// Therefore we have a lookup structure that accepts URIs, so that intermediate +// calculations for the same scheme can be reused. +// +// Caveats: +// Assuming up and down traversals each have uniform costs is simplistic. +// Often there are "semantic roots" whose children are almost unrelated. +// (e.g. /usr/include/, or / in an umbrella repository). We ignore this. +// +//===----------------------------------------------------------------------===// + +#include "URI.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/StringSaver.h" + +namespace clang { +namespace clangd { + +struct FileDistanceOptions { + unsigned UpCost = 2; // |foo/bar.h -> foo| + unsigned DownCost = 1; // |foo -> foo/bar.h| + unsigned IncludeCost = 2; // |foo.cc -> included_header.h| +}; + +struct SourceParams { + // Base cost for paths starting at this source. + unsigned Cost = 0; + // Limits the number of upwards traversals allowed from this source. + unsigned MaxUpTraversals = std::numeric_limits::max(); +}; + +// Supports lookups to find the minimum distance to a file from any source. +// This object should be reused, it memoizes intermediate computations. +class FileDistance { +public: + static constexpr unsigned kUnreachable = std::numeric_limits::max(); + + FileDistance(llvm::StringMap Sources, + const FileDistanceOptions &Opts = {}); + + // Computes the minimum distance from any source to the file path. + unsigned distance(llvm::StringRef Path); + +private: + // Costs computed so far. Always contains sources and their ancestors. + // We store hash codes only. Collisions are rare and consequences aren't dire. + llvm::DenseMap Cache; + FileDistanceOptions Opts; +}; + +// Supports lookups like FileDistance, but the lookup keys are URIs. +// We convert each of the sources to the scheme of the URI and do a FileDistance +// comparison on the bodies. +class URIDistance { +public: + URIDistance(llvm::StringMap Sources, + const FileDistanceOptions &Opts = {}) + : Sources(Sources), Opts(Opts) {} + + // Computes the minimum distance from any source to the URI. + // Only sources that can be mapped into the URI's scheme are considered. + unsigned distance(llvm::StringRef URI); + +private: + // Returns the FileDistance for a URI scheme, creating it if needed. + FileDistance &forScheme(llvm::StringRef Scheme); + + // We cache the results using the original strings so we can skip URI parsing. + llvm::DenseMap Cache; + llvm::StringMap Sources; + llvm::StringMap> ByScheme; + FileDistanceOptions Opts; +}; + +} // namespace clangd +} // namespace clang Index: clang-tools-extra/trunk/clangd/FileDistance.cpp =================================================================== --- clang-tools-extra/trunk/clangd/FileDistance.cpp +++ clang-tools-extra/trunk/clangd/FileDistance.cpp @@ -0,0 +1,173 @@ +//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The FileDistance structure allows calculating the minimum distance to paths +// in a single tree. +// We simply walk up the path's ancestors until we find a node whose cost is +// known, and add the cost of walking back down. Initialization ensures this +// gives the correct path to the roots. +// We cache the results, so that the runtime is O(|A|), where A is the set of +// all distinct ancestors of visited paths. +// +// Example after initialization with /=2, /bar=0, DownCost = 1: +// / = 2 +// /bar = 0 +// +// After querying /foo/bar and /bar/foo: +// / = 2 +// /bar = 0 +// /bar/foo = 1 +// /foo = 3 +// /foo/bar = 4 +// +// URIDistance creates FileDistance lazily for each URI scheme encountered. In +// practice this is a small constant factor. +// +//===-------------------------------------------------------------------------// + +#include "FileDistance.h" +#include "Logger.h" +#include "llvm/ADT/STLExtras.h" +#include +#define DEBUG_TYPE "FileDistance" + +namespace clang { +namespace clangd { +using namespace llvm; + +// Convert a path into the canonical form. +// Canonical form is either "/", or "/segment" * N: +// C:\foo\bar --> /c:/foo/bar +// /foo/ --> /foo +// a/b/c --> /a/b/c +static SmallString<128> canonicalize(StringRef Path) { + SmallString<128> Result = Path.rtrim('/'); + native(Result, sys::path::Style::posix); + if (Result.empty() || Result.front() != '/') + Result.insert(Result.begin(), '/'); + return Result; +} + +const unsigned FileDistance::kUnreachable; + +FileDistance::FileDistance(StringMap Sources, + const FileDistanceOptions &Opts) + : Opts(Opts) { + llvm::DenseMap> DownEdges; + // Compute the best distance following only up edges. + // Keep track of down edges, in case we can use them to improve on this. + for (const auto &S : Sources) { + auto Canonical = canonicalize(S.getKey()); + LLVM_DEBUG(dbgs() << "Source " << Canonical << " = " << S.second.Cost + << ", MaxUp=" << S.second.MaxUpTraversals << "\n"); + // Walk up to ancestors of this source, assigning cost. + StringRef Rest = Canonical; + llvm::hash_code Hash = hash_value(Rest); + for (unsigned I = 0; !Rest.empty(); ++I) { + Rest = parent_path(Rest, sys::path::Style::posix); + auto NextHash = hash_value(Rest); + DownEdges[NextHash].push_back(Hash); + // We can't just break after MaxUpTraversals, must still set DownEdges. + if (I > S.getValue().MaxUpTraversals) { + if (Cache.find(Hash) != Cache.end()) + break; + } else { + unsigned Cost = S.getValue().Cost + I * Opts.UpCost; + auto R = Cache.try_emplace(Hash, Cost); + if (!R.second) { + if (Cost < R.first->second) { + R.first->second = Cost; + } else { + // If we're not the best way to get to this path, stop assigning. + break; + } + } + } + Hash = NextHash; + } + } + // Now propagate scores parent -> child if that's an improvement. + // BFS ensures we propagate down chains (must visit parents before children). + std::queue Next; + for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef("")))) + Next.push(Child); + while (!Next.empty()) { + auto ParentCost = Cache.lookup(Next.front()); + for (auto Child : DownEdges.lookup(Next.front())) { + auto &ChildCost = + Cache.try_emplace(Child, kUnreachable).first->getSecond(); + if (ParentCost + Opts.DownCost < ChildCost) + ChildCost = ParentCost + Opts.DownCost; + Next.push(Child); + } + Next.pop(); + } +} + +unsigned FileDistance::distance(StringRef Path) { + auto Canonical = canonicalize(Path); + unsigned Cost = kUnreachable; + SmallVector Ancestors; + // Walk up ancestors until we find a path we know the distance for. + for (StringRef Rest = Canonical; !Rest.empty(); + Rest = parent_path(Rest, sys::path::Style::posix)) { + auto Hash = hash_value(Rest); + auto It = Cache.find(Hash); + if (It != Cache.end()) { + Cost = It->second; + break; + } + Ancestors.push_back(Hash); + } + // Now we know the costs for (known node, queried node]. + // Fill these in, walking down the directory tree. + for (hash_code Hash : reverse(Ancestors)) { + if (Cost != kUnreachable) + Cost += Opts.DownCost; + Cache.try_emplace(Hash, Cost); + } + LLVM_DEBUG(dbgs() << "distance(" << Path << ") = " << Cost << "\n"); + return Cost; +} + +unsigned URIDistance::distance(llvm::StringRef URI) { + auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable); + if (!R.second) + return R.first->getSecond(); + if (auto U = clangd::URI::parse(URI)) { + LLVM_DEBUG(dbgs() << "distance(" << URI << ") = distance(" << U->body() + << ")\n"); + R.first->second = forScheme(U->scheme()).distance(U->body()); + } else { + log("URIDistance::distance() of unparseable " + URI + ": " + + llvm::toString(U.takeError())); + } + return R.first->second; +} + +FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { + auto &Delegate = ByScheme[Scheme]; + if (!Delegate) { + llvm::StringMap SchemeSources; + for (const auto &Source : Sources) { + if (auto U = clangd::URI::create(Source.getKey(), Scheme)) + SchemeSources.try_emplace(U->body(), Source.getValue()); + else + consumeError(U.takeError()); + } + LLVM_DEBUG(dbgs() << "FileDistance for scheme " << Scheme << ": " + << SchemeSources.size() << "/" << Sources.size() + << " sources\n"); + Delegate.reset(new FileDistance(std::move(SchemeSources), Opts)); + } + return *Delegate; +} + +} // namespace clangd +} // namespace clang Index: clang-tools-extra/trunk/clangd/Headers.h =================================================================== --- clang-tools-extra/trunk/clangd/Headers.h +++ clang-tools-extra/trunk/clangd/Headers.h @@ -45,10 +45,47 @@ Path Resolved; // Resolved path of included file. Empty if not resolved. }; +// Information captured about the inclusion graph in a translation unit. +// This includes detailed information about the direct #includes, and summary +// information about all transitive includes. +// +// It should be built incrementally with collectIncludeStructureCallback(). +// When we build the preamble, we capture and store its include structure along +// with the preamble data. When we use the preamble, we can copy its +// IncludeStructure and use another collectIncludeStructureCallback() to fill +// in any non-preamble inclusions. +class IncludeStructure { +public: + std::vector MainFileIncludes; + + // Return all transitively reachable files, and their minimum include depth. + // All transitive includes (absolute paths), with their minimum include depth. + // Root --> 0, #included file --> 1, etc. + // Root is clang's name for a file, which may not be absolute. + // Usually it should be SM.getFileEntryForID(SM.getMainFileID())->getName(). + llvm::StringMap includeDepth(llvm::StringRef Root) const; + + // This updates IncludeDepth(), but not MainFileIncludes. + void recordInclude(llvm::StringRef IncludingName, + llvm::StringRef IncludedName, + llvm::StringRef IncludedRealName); + +private: + // Identifying files in a way that persists from preamble build to subsequent + // builds is surprisingly hard. FileID is unavailable in InclusionDirective(), + // and RealPathName and UniqueID are not preseved in the preamble. + // We use the FileEntry::Name, which is stable, interned into a "file index". + // The paths we want to expose are the RealPathName, so store those too. + std::vector RealPathNames; // In file index order. + unsigned fileIndex(llvm::StringRef Name); + llvm::StringMap NameToIndex; // Values are file indexes. + // Maps a file's index to that of the files it includes. + llvm::DenseMap> IncludeChildren; +}; + /// Returns a PPCallback that visits all inclusions in the main file. std::unique_ptr -collectInclusionsInMainFileCallback(const SourceManager &SM, - std::function Callback); +collectIncludeStructureCallback(const SourceManager &SM, IncludeStructure *Out); // Calculates insertion edit for including a new header in a file. class IncludeInserter { Index: clang-tools-extra/trunk/clangd/Headers.cpp =================================================================== --- clang-tools-extra/trunk/clangd/Headers.cpp +++ clang-tools-extra/trunk/clangd/Headers.cpp @@ -23,9 +23,8 @@ class RecordHeaders : public PPCallbacks { public: - RecordHeaders(const SourceManager &SM, - std::function Callback) - : SM(SM), Callback(std::move(Callback)) {} + RecordHeaders(const SourceManager &SM, IncludeStructure *Out) + : SM(SM), Out(Out) {} // Record existing #includes - both written and resolved paths. Only #includes // in the main file are collected. @@ -36,21 +35,28 @@ llvm::StringRef /*RelativePath*/, const Module * /*Imported*/, SrcMgr::CharacteristicKind /*FileType*/) override { - // Only inclusion directives in the main file make sense. The user cannot - // select directives not in the main file. - if (HashLoc.isInvalid() || !SM.isInMainFile(HashLoc)) - return; - std::string Written = - (IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str(); - std::string Resolved = (!File || File->tryGetRealPathName().empty()) - ? "" - : File->tryGetRealPathName(); - Callback({halfOpenToRange(SM, FilenameRange), Written, Resolved}); + if (SM.isInMainFile(HashLoc)) + Out->MainFileIncludes.push_back({ + halfOpenToRange(SM, FilenameRange), + (IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str(), + File ? File->tryGetRealPathName() : "", + }); + if (File) { + auto *IncludingFileEntry = SM.getFileEntryForID(SM.getFileID(HashLoc)); + if (!IncludingFileEntry) { + assert(SM.getBufferName(HashLoc).startswith("<") && + "Expected #include location to be a file or "); + // Treat as if included from the main file. + IncludingFileEntry = SM.getFileEntryForID(SM.getMainFileID()); + } + Out->recordInclude(IncludingFileEntry->getName(), File->getName(), + File->tryGetRealPathName()); + } } private: const SourceManager &SM; - std::function Callback; + IncludeStructure *Out; }; } // namespace @@ -65,9 +71,59 @@ } std::unique_ptr -collectInclusionsInMainFileCallback(const SourceManager &SM, - std::function Callback) { - return llvm::make_unique(SM, std::move(Callback)); +collectIncludeStructureCallback(const SourceManager &SM, + IncludeStructure *Out) { + return llvm::make_unique(SM, Out); +} + +void IncludeStructure::recordInclude(llvm::StringRef IncludingName, + llvm::StringRef IncludedName, + llvm::StringRef IncludedRealName) { + auto Child = fileIndex(IncludedName); + if (!IncludedRealName.empty() && RealPathNames[Child].empty()) + RealPathNames[Child] = IncludedRealName; + auto Parent = fileIndex(IncludingName); + IncludeChildren[Parent].push_back(Child); +} + +unsigned IncludeStructure::fileIndex(llvm::StringRef Name) { + auto R = NameToIndex.try_emplace(Name, RealPathNames.size()); + if (R.second) + RealPathNames.emplace_back(); + return R.first->getValue(); +} + +llvm::StringMap +IncludeStructure::includeDepth(llvm::StringRef Root) const { + // Include depth 0 is the main file only. + llvm::StringMap Result; + Result[Root] = 0; + std::vector CurrentLevel; + llvm::DenseSet Seen; + auto It = NameToIndex.find(Root); + if (It != NameToIndex.end()) { + CurrentLevel.push_back(It->second); + Seen.insert(It->second); + } + + // Each round of BFS traversal finds the next depth level. + std::vector PreviousLevel; + for (unsigned Level = 1; !CurrentLevel.empty(); ++Level) { + PreviousLevel.clear(); + PreviousLevel.swap(CurrentLevel); + for (const auto &Parent : PreviousLevel) { + for (const auto &Child : IncludeChildren.lookup(Parent)) { + if (Seen.insert(Child).second) { + CurrentLevel.push_back(Child); + const auto &Name = RealPathNames[Child]; + // Can't include files if we don't have their real path. + if (!Name.empty()) + Result[Name] = Level; + } + } + } + } + return Result; } /// FIXME(ioeric): we might not want to insert an absolute include path if the Index: clang-tools-extra/trunk/clangd/Quality.h =================================================================== --- clang-tools-extra/trunk/clangd/Quality.h +++ clang-tools-extra/trunk/clangd/Quality.h @@ -38,6 +38,7 @@ class CodeCompletionResult; namespace clangd { struct Symbol; +class URIDistance; // Signals structs are designed to be aggregated from 0 or more sources. // A default instance has neutral signals, and sources are merged into it. @@ -69,15 +70,13 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolQualitySignals &); -class FileProximityMatcher; - /// Attributes of a symbol-query pair that affect how much we like it. struct SymbolRelevanceSignals { /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. float NameMatch = 1; bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). - const FileProximityMatcher *FileProximityMatch = nullptr; + URIDistance *FileProximityMatch = nullptr; /// This is used to calculate proximity between the index symbol and the /// query. llvm::StringRef SymbolURI; @@ -111,25 +110,6 @@ /// Combine symbol quality and relevance into a single score. float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); -class FileProximityMatcher { - public: - /// \p ProximityPaths are used to compute proximity scores from symbol's - /// declaring file. The best score will be used. - explicit FileProximityMatcher( - llvm::ArrayRef ProximityPaths); - - /// Calculates the best proximity score from proximity paths to the symbol's - /// URI. Score is [0-1], 1 means \p SymbolURI exactly matches a proximity - /// path. When a path cannot be encoded into the same scheme as \p - /// SymbolURI, the proximity will be 0. - float uriProximity(llvm::StringRef SymbolURI) const; - - private: - llvm::SmallVector ProximityPaths; - friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, - const FileProximityMatcher &); -}; - /// TopN is a lossy container that preserves only the "best" N elements. template > class TopN { public: Index: clang-tools-extra/trunk/clangd/Quality.cpp =================================================================== --- clang-tools-extra/trunk/clangd/Quality.cpp +++ clang-tools-extra/trunk/clangd/Quality.cpp @@ -7,17 +7,18 @@ // //===---------------------------------------------------------------------===// #include "Quality.h" -#include +#include "FileDistance.h" #include "URI.h" #include "index/Index.h" #include "clang/AST/ASTContext.h" -#include "clang/Basic/CharInfo.h" #include "clang/AST/DeclVisitor.h" +#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceManager.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include namespace clang { namespace clangd { @@ -187,60 +188,6 @@ return OS; } -/// Calculates a proximity score from \p From and \p To, which are URI strings -/// that have the same scheme. This does not parse URI. A URI (sans ":") -/// is split into chunks by '/' and each chunk is considered a file/directory. -/// For example, "uri:///a/b/c" will be treated as /a/b/c -static float uriProximity(StringRef From, StringRef To) { - auto SchemeSplitFrom = From.split(':'); - auto SchemeSplitTo = To.split(':'); - assert((SchemeSplitFrom.first == SchemeSplitTo.first) && - "URIs must have the same scheme in order to compute proximity."); - auto Split = [](StringRef URIWithoutScheme) { - SmallVector Split; - URIWithoutScheme.split(Split, '/', /*MaxSplit=*/-1, /*KeepEmpty=*/false); - return Split; - }; - SmallVector Fs = Split(SchemeSplitFrom.second); - SmallVector Ts = Split(SchemeSplitTo.second); - auto F = Fs.begin(), T = Ts.begin(), FE = Fs.end(), TE = Ts.end(); - for (; F != FE && T != TE && *F == *T; ++F, ++T) { - } - // We penalize for traversing up and down from \p From to \p To but penalize - // less for traversing down because subprojects are more closely related than - // superprojects. - int UpDist = FE - F; - int DownDist = TE - T; - return std::pow(0.7, UpDist + DownDist/2); -} - -FileProximityMatcher::FileProximityMatcher(ArrayRef ProximityPaths) - : ProximityPaths(ProximityPaths.begin(), ProximityPaths.end()) {} - -float FileProximityMatcher::uriProximity(StringRef SymbolURI) const { - float Score = 0; - if (!ProximityPaths.empty() && !SymbolURI.empty()) { - for (const auto &Path : ProximityPaths) - // Only calculate proximity score for two URIs with the same scheme so - // that the computation can be purely text-based and thus avoid expensive - // URI encoding/decoding. - if (auto U = URI::create(Path, SymbolURI.split(':').first)) { - Score = std::max(Score, clangd::uriProximity(U->toString(), SymbolURI)); - } else { - llvm::consumeError(U.takeError()); - } - } - return Score; -} - -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const FileProximityMatcher &M) { - OS << formatv("File proximity matcher: "); - OS << formatv("ProximityPaths[{0}]", llvm::join(M.ProximityPaths.begin(), - M.ProximityPaths.end(), ",")); - return OS; -} - static SymbolRelevanceSignals::AccessibleScope ComputeScope(const NamedDecl *D) { // Injected "Foo" within the class "Foo" has file scope, not class scope. @@ -288,6 +235,15 @@ Scope = std::min(Scope, ComputeScope(SemaCCResult.Declaration)); } +static std::pair proximityScore(llvm::StringRef SymbolURI, + URIDistance *D) { + if (!D || SymbolURI.empty()) + return {0.f, 0u}; + unsigned Distance = D->distance(SymbolURI); + // Assume approximately default options are used for sensible scoring. + return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance}; +} + float SymbolRelevanceSignals::evaluate() const { float Score = 1; @@ -296,11 +252,10 @@ Score *= NameMatch; - float IndexProximityScore = - FileProximityMatch ? FileProximityMatch->uriProximity(SymbolURI) : 0; // Proximity scores are [0,1] and we translate them into a multiplier in the - // range from 1 to 2. - Score *= 1 + std::max(IndexProximityScore, SemaProximityScore); + // range from 1 to 3. + Score *= 1 + 2 * std::max(proximityScore(SymbolURI, FileProximityMatch).first, + SemaProximityScore); // Symbols like local variables may only be referenced within their scope. // Conversely if we're in that scope, it's likely we'll reference them. @@ -331,9 +286,9 @@ OS << formatv("\tForbidden: {0}\n", S.Forbidden); OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI); if (S.FileProximityMatch) { - OS << "\tIndex proximity: " - << S.FileProximityMatch->uriProximity(S.SymbolURI) << " (" - << *S.FileProximityMatch << ")\n"; + auto Score = proximityScore(S.SymbolURI, S.FileProximityMatch); + OS << formatv("\tIndex proximity: {0} (distance={1})\n", Score.first, + Score.second); } OS << formatv("\tSema proximity: {0}\n", S.SemaProximityScore); OS << formatv("\tQuery type: {0}\n", static_cast(S.Query)); Index: clang-tools-extra/trunk/clangd/XRefs.cpp =================================================================== --- clang-tools-extra/trunk/clangd/XRefs.cpp +++ clang-tools-extra/trunk/clangd/XRefs.cpp @@ -230,13 +230,10 @@ std::vector findDefinitions(ParsedAST &AST, Position Pos, const SymbolIndex *Index) { const SourceManager &SourceMgr = AST.getASTContext().getSourceManager(); - SourceLocation SourceLocationBeg = - getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID()); std::vector Result; // Handle goto definition for #include. - for (auto &Inc : AST.getInclusions()) { - Position Pos = sourceLocToPosition(SourceMgr, SourceLocationBeg); + for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) { if (!Inc.Resolved.empty() && Inc.R.contains(Pos)) Result.push_back(Location{URIForFile{Inc.Resolved}, {}}); } @@ -244,6 +241,8 @@ return Result; // Identified symbols at a specific position. + SourceLocation SourceLocationBeg = + getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID()); auto Symbols = getSymbolAtPosition(AST, SourceLocationBeg); for (auto Item : Symbols.Macros) { Index: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt +++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt @@ -17,6 +17,7 @@ ContextTests.cpp DraftStoreTests.cpp FileIndexTests.cpp + FileDistanceTests.cpp FindSymbolsTests.cpp FuzzyMatchTests.cpp GlobalCompilationDatabaseTests.cpp Index: clang-tools-extra/trunk/unittests/clangd/FileDistanceTests.cpp =================================================================== --- clang-tools-extra/trunk/unittests/clangd/FileDistanceTests.cpp +++ clang-tools-extra/trunk/unittests/clangd/FileDistanceTests.cpp @@ -0,0 +1,93 @@ +//===-- FileDistanceTests.cpp ------------------------*- C++ -*-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "FileDistance.h" +#include "TestFS.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { + +TEST(FileDistanceTests, Distance) { + FileDistanceOptions Opts; + Opts.UpCost = 5; + Opts.DownCost = 3; + SourceParams CostTwo; + CostTwo.Cost = 2; + FileDistance D( + {{"tools/clang/lib/Format/FormatToken.cpp", SourceParams()}, + {"tools/clang/include/clang/Format/FormatToken.h", SourceParams()}, + {"include/llvm/ADT/StringRef.h", CostTwo}}, + Opts); + + // Source + EXPECT_EQ(D.distance("tools/clang/lib/Format/FormatToken.cpp"), 0u); + EXPECT_EQ(D.distance("include/llvm/ADT/StringRef.h"), 2u); + // Parent + EXPECT_EQ(D.distance("tools/clang/lib/Format/"), 5u); + // Child + EXPECT_EQ(D.distance("tools/clang/lib/Format/FormatToken.cpp/Oops"), 3u); + // Ancestor (up+up+up+up) + EXPECT_EQ(D.distance("/"), 22u); + // Sibling (up+down) + EXPECT_EQ(D.distance("tools/clang/lib/Format/AnotherFile.cpp"), 8u); + // Cousin (up+up+down+down) + EXPECT_EQ(D.distance("include/llvm/Support/Allocator.h"), 18u); + // First cousin, once removed (up+up+up+down+down) + EXPECT_EQ(D.distance("include/llvm-c/Core.h"), 23u); +} + +TEST(FileDistanceTests, BadSource) { + // We mustn't assume that paths above sources are best reached via them. + FileDistanceOptions Opts; + Opts.UpCost = 5; + Opts.DownCost = 3; + SourceParams CostLots; + CostLots.Cost = 100; + FileDistance D({{"a", SourceParams()}, {"b/b/b", CostLots}}, Opts); + EXPECT_EQ(D.distance("b"), 8u); // a+up+down, not b+up+up + EXPECT_EQ(D.distance("b/b/b"), 14u); // a+up+down+down+down, not b + EXPECT_EQ(D.distance("b/b/b/c"), 17u); // a+up+down+down+down+down, not b+down +} + +auto UseUnittestScheme = UnittestSchemeAnchorSource; + +TEST(FileDistanceTests, URI) { + FileDistanceOptions Opts; + Opts.UpCost = 5; + Opts.DownCost = 3; + SourceParams CostLots; + CostLots.Cost = 1000; + + URIDistance D( + {{testPath("foo"), CostLots}, {"/not/a/testpath", SourceParams()}}, Opts); + EXPECT_EQ(D.distance("file:///not/a/testpath/either"), 3u); + EXPECT_EQ(D.distance("unittest:foo"), 1000u); + EXPECT_EQ(D.distance("unittest:bar"), 1008u); +} + +TEST(FileDistance, LimitUpTraversals) { + FileDistanceOptions Opts; + Opts.UpCost = Opts.DownCost = 1; + SourceParams CheapButLimited, CostLots; + CheapButLimited.MaxUpTraversals = 1; + CostLots.Cost = 100; + + FileDistance D({{"/", CostLots}, {"/a/b/c", CheapButLimited}}, Opts); + EXPECT_EQ(D.distance("/a"), 101u); + EXPECT_EQ(D.distance("/a/z"), 102u); + EXPECT_EQ(D.distance("/a/b"), 1u); + EXPECT_EQ(D.distance("/a/b/z"), 2u); +} + +} // namespace +} // namespace clangd +} // namespace clang Index: clang-tools-extra/trunk/unittests/clangd/HeadersTests.cpp =================================================================== --- clang-tools-extra/trunk/unittests/clangd/HeadersTests.cpp +++ clang-tools-extra/trunk/unittests/clangd/HeadersTests.cpp @@ -64,18 +64,17 @@ } protected: - std::vector collectIncludes() { + IncludeStructure collectIncludes() { auto Clang = setupClang(); PreprocessOnlyAction Action; EXPECT_TRUE( Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])); - std::vector Inclusions; - Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback( - Clang->getSourceManager(), - [&](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); })); + IncludeStructure Includes; + Clang->getPreprocessor().addPPCallbacks( + collectIncludeStructureCallback(Clang->getSourceManager(), &Includes)); EXPECT_TRUE(Action.Execute()); Action.EndSourceFile(); - return Inclusions; + return Includes; } // Calculates the include path, or returns "" on error or header should not be @@ -133,6 +132,14 @@ MATCHER_P(Written, Name, "") { return arg.Written == Name; } MATCHER_P(Resolved, Name, "") { return arg.Resolved == Name; } +MATCHER_P2(Distance, File, D, "") { + if (arg.getKey() != File) + *result_listener << "file =" << arg.getKey().str(); + if (arg.getValue() != D) + *result_listener << "distance =" << arg.getValue(); + return arg.getKey() == File && arg.getValue() == D; +} + TEST_F(HeadersTest, CollectRewrittenAndResolved) { FS.Files[MainFile] = R"cpp( #include "sub/bar.h" // not shortest @@ -140,9 +147,12 @@ std::string BarHeader = testPath("sub/bar.h"); FS.Files[BarHeader] = ""; - EXPECT_THAT(collectIncludes(), + EXPECT_THAT(collectIncludes().MainFileIncludes, UnorderedElementsAre( AllOf(Written("\"sub/bar.h\""), Resolved(BarHeader)))); + EXPECT_THAT(collectIncludes().includeDepth(MainFile), + UnorderedElementsAre(Distance(MainFile, 0u), + Distance(testPath("sub/bar.h"), 1u))); } TEST_F(HeadersTest, OnlyCollectInclusionsInMain) { @@ -156,8 +166,16 @@ #include "bar.h" )cpp"; EXPECT_THAT( - collectIncludes(), + collectIncludes().MainFileIncludes, UnorderedElementsAre(AllOf(Written("\"bar.h\""), Resolved(BarHeader)))); + EXPECT_THAT(collectIncludes().includeDepth(MainFile), + UnorderedElementsAre(Distance(MainFile, 0u), + Distance(testPath("sub/bar.h"), 1u), + Distance(testPath("sub/baz.h"), 2u))); + // includeDepth() also works for non-main files. + EXPECT_THAT(collectIncludes().includeDepth(testPath("sub/bar.h")), + UnorderedElementsAre(Distance(testPath("sub/bar.h"), 0u), + Distance(testPath("sub/baz.h"), 1u))); } TEST_F(HeadersTest, UnResolvedInclusion) { @@ -165,8 +183,10 @@ #include "foo.h" )cpp"; - EXPECT_THAT(collectIncludes(), + EXPECT_THAT(collectIncludes().MainFileIncludes, UnorderedElementsAre(AllOf(Written("\"foo.h\""), Resolved("")))); + EXPECT_THAT(collectIncludes().includeDepth(MainFile), + UnorderedElementsAre(Distance(MainFile, 0u))); } TEST_F(HeadersTest, InsertInclude) { Index: clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp =================================================================== --- clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp +++ clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp @@ -17,6 +17,7 @@ // //===----------------------------------------------------------------------===// +#include "FileDistance.h" #include "Quality.h" #include "TestFS.h" #include "TestTU.h" @@ -162,9 +163,22 @@ PoorNameMatch.NameMatch = 0.2f; EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate()); - SymbolRelevanceSignals WithProximity; - WithProximity.SemaProximityScore = 0.2f; - EXPECT_GT(WithProximity.evaluate(), Default.evaluate()); + SymbolRelevanceSignals WithSemaProximity; + WithSemaProximity.SemaProximityScore = 0.2f; + EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate()); + + SymbolRelevanceSignals IndexProximate; + IndexProximate.SymbolURI = "unittest:/foo/bar.h"; + llvm::StringMap ProxSources; + ProxSources.try_emplace(testPath("foo/baz.h")); + URIDistance Distance(ProxSources); + IndexProximate.FileProximityMatch = &Distance; + EXPECT_GT(IndexProximate.evaluate(), Default.evaluate()); + SymbolRelevanceSignals IndexDistant = IndexProximate; + IndexDistant.SymbolURI = "unittest:/elsewhere/path.h"; + EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate()) + << IndexProximate << IndexDistant; + EXPECT_GT(IndexDistant.evaluate(), Default.evaluate()); SymbolRelevanceSignals Scoped; Scoped.Scope = SymbolRelevanceSignals::FileScope; @@ -185,59 +199,6 @@ EXPECT_LT(sortText(0, "a"), sortText(0, "z")); } -// {a,b,c} becomes /clangd-test/a/b/c -std::string joinPaths(llvm::ArrayRef Parts) { - return testPath( - llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator())); -} - -static constexpr float ProximityBase = 0.7f; - -// Calculates a proximity score for an index symbol with declaration file -// SymPath with the given URI scheme. -float URIProximity(const FileProximityMatcher &Matcher, StringRef SymPath, - StringRef Scheme = "file") { - auto U = URI::create(SymPath, Scheme); - EXPECT_TRUE(static_cast(U)) << llvm::toString(U.takeError()); - return Matcher.uriProximity(U->toString()); -} - -TEST(QualityTests, URIProximityScores) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})}); - - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})), - ProximityBase); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "y", "z"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})), - std::pow(ProximityBase, 2)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})), - std::pow(ProximityBase, 5)); - EXPECT_FLOAT_EQ( - URIProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})), - std::pow(ProximityBase, 6)); - // Note the common directory is /clang-test/ - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})), - std::pow(ProximityBase, 7)); -} - -TEST(QualityTests, URIProximityScoresWithTestURI) { - FileProximityMatcher Matcher( - /*ProximityPaths=*/{joinPaths({"b", "c", "x"})}); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "c", "x"}), "unittest"), - 1); - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "y"}), "unittest"), - std::pow(ProximityBase, 2)); - // unittest:///b/c/x vs unittest:///m/n/y. No common directory. - EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "y"}), "unittest"), - std::pow(ProximityBase, 4)); -} - } // namespace } // namespace clangd } // namespace clang