Index: clangd/CodeComplete.cpp =================================================================== --- clangd/CodeComplete.cpp +++ clangd/CodeComplete.cpp @@ -848,11 +848,17 @@ bool Incomplete = false; // Would more be available with a higher limit? llvm::Optional Filter; // Initialized once Sema runs. std::unique_ptr Includes; // Initialized once compiler runs. + FileProximityMatcher FileProximityMatch; public: // A CodeCompleteFlow object is only useful for calling run() exactly once. CodeCompleteFlow(PathRef FileName, const CodeCompleteOptions &Opts) - : FileName(FileName), Opts(Opts) {} + : FileName(FileName), Opts(Opts), + // FIXME: also use path of the main header corresponding to FileName to + // calculate the file proximity, which would capture include/ and src/ + // project setup where headers and implementations are not in the same + // directory. + FileProximityMatch({FileName}) {} CompletionList run(const SemaCompleteInput &SemaCCInput) && { trace::Span Tracer("CodeCompleteFlow"); @@ -993,6 +999,7 @@ SymbolQualitySignals Quality; SymbolRelevanceSignals Relevance; Relevance.Query = SymbolRelevanceSignals::CodeComplete; + Relevance.FileProximityMatch = &FileProximityMatch; if (auto FuzzyScore = fuzzyScore(C)) Relevance.NameMatch = *FuzzyScore; else Index: clangd/Quality.h =================================================================== --- clangd/Quality.h +++ clangd/Quality.h @@ -26,6 +26,7 @@ //===---------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include #include @@ -67,13 +68,37 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolQualitySignals &); +class FileProximityMatcher { + public: + /// \p ProximityPaths are used to compute proximity scores from symbol's + /// declaring file. The best score will be used. + explicit FileProximityMatcher( + llvm::ArrayRef ProximityPaths); + + /// Calculates the best proximity score from proximity paths to the symbol's + /// URI. Score is [0-1], 1 means \p SymbolURI exactly matches a proximity + /// path. When a path cannot be encoded into the same scheme as \p + /// SymbolURI, the proximity will be 0. + float uriProximity(llvm::StringRef SymbolURI) const; + + private: + llvm::SmallVector ProximityPaths; + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, + const FileProximityMatcher &); +}; + /// Attributes of a symbol-query pair that affect how much we like it. struct SymbolRelevanceSignals { /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. float NameMatch = 1; bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). + + const FileProximityMatcher *FileProximityMatch = nullptr; + /// This is used to calculate proximity between the index symbol and the + /// query. + llvm::StringRef IndexSymbolURI; /// Proximity between best declaration and the query. [0-1], 1 is closest. - float ProximityScore = 0; + float SemaProximityScore = 0; // An approximate measure of where we expect the symbol to be used. enum AccessibleScope { Index: clangd/Quality.cpp =================================================================== --- clangd/Quality.cpp +++ clangd/Quality.cpp @@ -7,6 +7,7 @@ // //===---------------------------------------------------------------------===// #include "Quality.h" +#include "URI.h" #include "index/Index.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/CharInfo.h" @@ -162,6 +163,60 @@ return OS; } +/// Calculates a proximity score from \p From and \p To, which are URI strings +/// that have the same scheme. This does not parse URI. A URI (sans ":") +/// is split into chunks by '/' and each chunk is considered a file/directory. +/// For example, "uri:///a/b/c" will be treated as /a/b/c +static float uriProximity(StringRef From, StringRef To) { + auto SchemeSplitFrom = From.split(':'); + auto SchemeSplitTo = To.split(':'); + assert((SchemeSplitFrom.first == SchemeSplitTo.first) && + "URIs must have the same scheme in order to compute proximity."); + auto Split = [](StringRef URIWithoutScheme) { + SmallVector Split; + URIWithoutScheme.split(Split, '/', /*MaxSplit=*/-1, /*KeepEmpty=*/false); + return Split; + }; + SmallVector Fs = Split(SchemeSplitFrom.second); + SmallVector Ts = Split(SchemeSplitTo.second); + auto F = Fs.begin(), T = Ts.begin(), FE = Fs.end(), TE = Ts.end(); + for (; F != FE && T != TE && *F == *T; ++F, ++T) { + } + // We penalize for traversing up and down from \p From to \p To but penalize + // less for traversing down because subprojects are more closely related than + // superprojects. + int UpDist = FE - F; + int DownDist = TE - T; + return std::pow(0.7, UpDist + DownDist/2); +} + +FileProximityMatcher::FileProximityMatcher(ArrayRef ProximityPaths) + : ProximityPaths(ProximityPaths.begin(), ProximityPaths.end()) {} + +float FileProximityMatcher::uriProximity(StringRef SymbolURI) const { + float Score = 0; + if (!ProximityPaths.empty() && !SymbolURI.empty()) { + for (const auto &Path : ProximityPaths) + // Only calculate proximity score for two URIs with the same scheme so + // that the computation can be purely text-based and thus avoid expensive + // URI encoding/decoding. + if (auto U = URI::create(Path, SymbolURI.split(':').first)) { + Score = std::max(Score, clangd::uriProximity(U->toString(), SymbolURI)); + } else { + llvm::consumeError(U.takeError()); + } + } + return Score; +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const FileProximityMatcher &M) { + OS << formatv("File proximity matcher: "); + OS << formatv("ProximityPaths{{0}}", llvm::join(M.ProximityPaths.begin(), + M.ProximityPaths.end(), ",")); + return OS; +} + static SymbolRelevanceSignals::AccessibleScope ComputeScope(const NamedDecl &D) { bool InClass = false; @@ -182,6 +237,8 @@ void SymbolRelevanceSignals::merge(const Symbol &IndexResult) { // FIXME: Index results always assumed to be at global scope. If Scope becomes // relevant to non-completion requests, we should recognize class members etc. + + IndexSymbolURI = IndexResult.CanonicalDeclaration.FileURI; } void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) { @@ -190,11 +247,12 @@ Forbidden = true; if (SemaCCResult.Declaration) { - // We boost things that have decls in the main file. - // The real proximity scores would be more general when we have them. + // We boost things that have decls in the main file. We give a fixed score + // for all other declarations in sema as they are already included in the + // translation unit. float DeclProximity = - hasDeclInMainFile(*SemaCCResult.Declaration) ? 1.0 : 0.0; - ProximityScore = std::max(DeclProximity, ProximityScore); + hasDeclInMainFile(*SemaCCResult.Declaration) ? 1.0 : 0.6; + SemaProximityScore = std::max(DeclProximity, SemaProximityScore); } // Declarations are scoped, others (like macros) are assumed global. @@ -210,9 +268,11 @@ Score *= NameMatch; + float IndexProximityScore = + FileProximityMatch ? FileProximityMatch->uriProximity(IndexSymbolURI) : 0; // Proximity scores are [0,1] and we translate them into a multiplier in the // range from 1 to 2. - Score *= 1 + ProximityScore; + Score *= 1 + std::max(IndexProximityScore, SemaProximityScore); // Symbols like local variables may only be referenced within their scope. // Conversely if we're in that scope, it's likely we'll reference them. @@ -236,11 +296,18 @@ return Score; } + raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) { OS << formatv("=== Symbol relevance: {0}\n", S.evaluate()); OS << formatv("\tName match: {0}\n", S.NameMatch); OS << formatv("\tForbidden: {0}\n", S.Forbidden); - OS << formatv("\tProximity: {0}\n", S.ProximityScore); + OS << formatv("\tIndexSymbolURI: {0}\n", S.IndexSymbolURI); + if (S.FileProximityMatch) { + OS << "\t" << *S.FileProximityMatch << "\n"; + OS << formatv("\tIndexProximity: {0}\n", + S.FileProximityMatch->uriProximity(S.IndexSymbolURI)); + } + OS << formatv("\tSemaProximity: {0}\n", S.SemaProximityScore); OS << formatv("\tQuery type: {0}\n", static_cast(S.Query)); OS << formatv("\tScope: {0}\n", static_cast(S.Scope)); return OS; Index: unittests/clangd/QualityTests.cpp =================================================================== --- unittests/clangd/QualityTests.cpp +++ unittests/clangd/QualityTests.cpp @@ -18,12 +18,18 @@ //===----------------------------------------------------------------------===// #include "Quality.h" +#include "TestFS.h" #include "TestTU.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace clang { namespace clangd { + +// Force the unittest URI scheme to be linked, +static int LLVM_ATTRIBUTE_UNUSED UnittestSchemeAnchorDest = + UnittestSchemeAnchorSource; + namespace { TEST(QualityTests, SymbolQualitySignalExtraction) { @@ -87,13 +93,14 @@ Relevance = {}; Relevance.merge(CodeCompletionResult(&findDecl(AST, "main"), 42)); - EXPECT_FLOAT_EQ(Relevance.ProximityScore, 1.0) << "Decl in current file"; + EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 1.0) << "Decl in current file"; Relevance = {}; Relevance.merge(CodeCompletionResult(&findDecl(AST, "header"), 42)); - EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.0) << "Decl from header"; + EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 0.6) << "Decl from header"; Relevance = {}; Relevance.merge(CodeCompletionResult(&findDecl(AST, "header_main"), 42)); - EXPECT_FLOAT_EQ(Relevance.ProximityScore, 1.0) << "Current file and header"; + EXPECT_FLOAT_EQ(Relevance.SemaProximityScore, 1.0) + << "Current file and header"; Relevance = {}; Relevance.merge(CodeCompletionResult(&findAnyDecl(AST, "X"), 42)); @@ -145,7 +152,7 @@ EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate()); SymbolRelevanceSignals WithProximity; - WithProximity.ProximityScore = 0.2f; + WithProximity.SemaProximityScore = 0.2f; EXPECT_GT(WithProximity.evaluate(), Default.evaluate()); SymbolRelevanceSignals Scoped; @@ -167,6 +174,59 @@ EXPECT_LT(sortText(0, "a"), sortText(0, "z")); } +// {a,b,c} becomes /clangd-test/a/b/c +std::string joinPaths(llvm::ArrayRef Parts) { + return testPath( + llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator())); +} + +static constexpr float ProximityBase = 0.7; + +// Calculates a proximity score for an index symbol with declaration file +// SymPath with the given URI scheme. +float URIProximity(const FileProximityMatcher &Matcher, StringRef SymPath, + StringRef Scheme = "file") { + auto U = URI::create(SymPath, Scheme); + EXPECT_TRUE(static_cast(U)) << llvm::toString(U.takeError()); + return Matcher.uriProximity(U->toString()); +} + +TEST(QualityTests, URIProximityScores) { + FileProximityMatcher Matcher( + /*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})}); + + EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})), + 1); + EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})), + ProximityBase); + EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "y", "z"})), + std::pow(ProximityBase, 5)); + EXPECT_FLOAT_EQ( + URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})), + std::pow(ProximityBase, 2)); + EXPECT_FLOAT_EQ( + URIProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})), + std::pow(ProximityBase, 5)); + EXPECT_FLOAT_EQ( + URIProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})), + std::pow(ProximityBase, 6)); + // Note the common directory is /clang-test/ + EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})), + std::pow(ProximityBase, 7)); +} + +TEST(QualityTests, URIProximityScoresWithTestURI) { + FileProximityMatcher Matcher( + /*ProximityPaths=*/{joinPaths({"b", "c", "x"})}); + EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "c", "x"}), "unittest"), + 1); + EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "y"}), "unittest"), + std::pow(ProximityBase, 2)); + // unittest:///b/c/x vs unittest:///m/n/y. No common directory. + EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "y"}), "unittest"), + std::pow(ProximityBase, 4)); +} + } // namespace } // namespace clangd } // namespace clang Index: unittests/clangd/SymbolCollectorTests.cpp =================================================================== --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -372,17 +372,15 @@ UnorderedElementsAre(AllOf(QName("Foo"), DeclURI(TestHeaderURI)))); } -#ifndef _WIN32 TEST_F(SymbolCollectorTest, CustomURIScheme) { // Use test URI scheme from URITests.cpp CollectorOpts.URISchemes.insert(CollectorOpts.URISchemes.begin(), "unittest"); - TestHeaderName = testPath("test-root/x.h"); - TestFileName = testPath("test-root/x.cpp"); + TestHeaderName = testPath("x.h"); + TestFileName = testPath("x.cpp"); runSymbolCollector("class Foo {};", /*Main=*/""); - EXPECT_THAT(Symbols, - UnorderedElementsAre(AllOf(QName("Foo"), DeclURI("unittest:x.h")))); + EXPECT_THAT(Symbols, UnorderedElementsAre( + AllOf(QName("Foo"), DeclURI("unittest:///x.h")))); } -#endif TEST_F(SymbolCollectorTest, InvalidURIScheme) { // Use test URI scheme from URITests.cpp Index: unittests/clangd/TestFS.h =================================================================== --- unittests/clangd/TestFS.h +++ unittests/clangd/TestFS.h @@ -56,6 +56,10 @@ // Returns a suitable absolute path for this OS. std::string testPath(PathRef File); +// This anchor is used to force the linker to link in the generated object file +// and thus register unittest: URI scheme plugin. +extern volatile int UnittestSchemeAnchorSource; + } // namespace clangd } // namespace clang #endif Index: unittests/clangd/TestFS.cpp =================================================================== --- unittests/clangd/TestFS.cpp +++ unittests/clangd/TestFS.cpp @@ -7,7 +7,11 @@ // //===----------------------------------------------------------------------===// #include "TestFS.h" +#include "URI.h" +#include "clang/AST/DeclCXX.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/Path.h" #include "gtest/gtest.h" namespace clang { @@ -62,5 +66,38 @@ return Path.str(); } +/// unittest: is a scheme that refers to files relative to testRoot() +class TestScheme : public URIScheme { +public: + static const char *Scheme; + + llvm::Expected + getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body, + llvm::StringRef HintPath) const override { + assert(HintPath.startswith(testRoot())); + llvm::SmallString<16> Path(Body.begin(), Body.end()); + llvm::sys::path::native(Path); + return testPath(Path); + } + + llvm::Expected + uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { + llvm::StringRef Body = AbsolutePath; + if (!Body.consume_front(testRoot())) + return llvm::make_error( + AbsolutePath + "does not start with " + testRoot(), + llvm::inconvertibleErrorCode()); + + return URI(Scheme, /*Authority=*/"", + llvm::sys::path::convert_to_slash(Body)); + } +}; + +const char *TestScheme::Scheme = "unittest"; + +static URISchemeRegistry::Add X(TestScheme::Scheme, "Test schema"); + +volatile int UnittestSchemeAnchorSource = 0; + } // namespace clangd } // namespace clang Index: unittests/clangd/URITests.cpp =================================================================== --- unittests/clangd/URITests.cpp +++ unittests/clangd/URITests.cpp @@ -14,6 +14,11 @@ namespace clang { namespace clangd { + +// Force the unittest URI scheme to be linked, +static int LLVM_ATTRIBUTE_UNUSED UnittestSchemeAnchorDest = + UnittestSchemeAnchorSource; + namespace { using ::testing::AllOf; @@ -22,38 +27,6 @@ MATCHER_P(Authority, A, "") { return arg.authority() == A; } MATCHER_P(Body, B, "") { return arg.body() == B; } -// Assume all files in the schema have a "test-root/" root directory, and the -// schema path is the relative path to the root directory. -// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z". -class TestScheme : public URIScheme { -public: - static const char *Scheme; - - static const char *TestRoot; - - llvm::Expected - getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body, - llvm::StringRef HintPath) const override { - auto Pos = HintPath.find(TestRoot); - assert(Pos != llvm::StringRef::npos); - return (HintPath.substr(0, Pos + llvm::StringRef(TestRoot).size()) + Body) - .str(); - } - - llvm::Expected - uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { - auto Pos = AbsolutePath.find(TestRoot); - assert(Pos != llvm::StringRef::npos); - return URI(Scheme, /*Authority=*/"", - AbsolutePath.substr(Pos + llvm::StringRef(TestRoot).size())); - } -}; - -const char *TestScheme::Scheme = "unittest"; -const char *TestScheme::TestRoot = "/test-root/"; - -static URISchemeRegistry::Add X(TestScheme::Scheme, "Test schema"); - std::string createOrDie(llvm::StringRef AbsolutePath, llvm::StringRef Scheme = "file") { auto Uri = URI::create(AbsolutePath, Scheme); @@ -167,12 +140,12 @@ #else EXPECT_EQ(resolveOrDie(parseOrDie("file:/a/b/c")), "/a/b/c"); EXPECT_EQ(resolveOrDie(parseOrDie("file://auth/a/b/c")), "/a/b/c"); - EXPECT_EQ(resolveOrDie(parseOrDie("unittest:a/b/c"), "/dir/test-root/x/y/z"), - "/dir/test-root/a/b/c"); EXPECT_THAT(resolveOrDie(parseOrDie("file://au%3dth/%28x%29/y/%20z")), "/(x)/y/ z"); EXPECT_THAT(resolveOrDie(parseOrDie("file:///c:/x/y/z")), "c:/x/y/z"); #endif + EXPECT_EQ(resolveOrDie(parseOrDie("unittest:a"), testPath("x")), + testPath("a")); } TEST(URITest, Platform) {