Index: clangd/CodeComplete.cpp =================================================================== --- clangd/CodeComplete.cpp +++ clangd/CodeComplete.cpp @@ -999,7 +999,10 @@ return; if (IndexResult) { Quality.merge(*IndexResult); - Relevance.merge(*IndexResult); + // FIXME: also use the main header path to calculate the file proximity, + // which would capture include/ and src/ project setup where headers and + // implementations are not in the same directory. + Relevance.merge(*IndexResult, /*ProximityPaths=*/{FileName}); } if (SemaResult) { Quality.merge(*SemaResult); Index: clangd/Quality.h =================================================================== --- clangd/Quality.h +++ clangd/Quality.h @@ -89,7 +89,10 @@ } Query = Generic; void merge(const CodeCompletionResult &SemaResult); - void merge(const Symbol &IndexResult); + // If \p ProximityPaths are set, they are used to compute proximity scores + // from symbol's declaring file. The best score will be used. + void merge(const Symbol &IndexResult, + llvm::SmallVector ProximityPaths = {}); // Condense these signals down to a single number, higher is better. float evaluate() const; Index: clangd/Quality.cpp =================================================================== --- clangd/Quality.cpp +++ clangd/Quality.cpp @@ -7,6 +7,7 @@ // //===---------------------------------------------------------------------===// #include "Quality.h" +#include "URI.h" #include "index/Index.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/CharInfo.h" @@ -179,9 +180,57 @@ return SymbolRelevanceSignals::GlobalScope; } -void SymbolRelevanceSignals::merge(const Symbol &IndexResult) { +/// Calculates a proximity score between two URI strings that have the same +/// scheme. This does not parse URI. A URI (sans ":") is split into +/// chunks by '/' and each chunk is considered a file/directory. For example, +/// "uri:///a/b/c" will be treated as /a/b/c +static float uriProximity(StringRef UX, StringRef UY) { + auto SchemeSplitX = UX.split(':'); + auto SchemeSplitY = UY.split(':'); + assert((SchemeSplitX.first == SchemeSplitY.first) && + "URIs must have the same scheme in order to compute proximity."); + auto Split = [](StringRef URIWithoutScheme) { + SmallVector Split; + URIWithoutScheme.split(Split, '/', /*MaxSplit=*/-1, /*KeepEmpty=*/false); + return Split; + }; + SmallVector Xs = Split(SchemeSplitX.second); + SmallVector Ys = Split(SchemeSplitY.second); + auto X = Xs.begin(), Y = Ys.begin(), XE = Xs.end(), YE = Ys.end(); + for (; X != XE && Y != YE && *X == *Y; ++X, ++Y) { + } + auto Dist = (XE - X) + (YE - Y); + // Either UX and UY are in the same directory or one is in an ancestor + // directory of the other. + // 1.0 same file, 0.9 in the same directory; -0.05 for each directory away. + if (X >= (XE - 1) || Y >= (YE - 1)) + return 1.0 - Dist * 0.05; + if (X == Xs.begin() && Y == Ys.begin()) // No common directory. + return 0; + return std::max(0.0, 1.0 - 0.1 * Dist); +} + +void SymbolRelevanceSignals::merge( + const Symbol &IndexResult, + llvm::SmallVector ProximityPaths) { // FIXME: Index results always assumed to be at global scope. If Scope becomes // relevant to non-completion requests, we should recognize class members etc. + + StringRef SymURI = IndexResult.CanonicalDeclaration.FileURI; + float IndexProximity = 0; + if (!ProximityPaths.empty() && !SymURI.empty()) { + for (const auto &Path : ProximityPaths) + // Only calculate proximity score for two URIs with the same scheme so + // that the computation can be purely text-based and thus avoid expensive + // URI encoding/decoding. + if (auto U = URI::create(Path, SymURI.split(':').first)) { + IndexProximity = + std::max(IndexProximity, uriProximity(SymURI, U->toString())); + } else { + llvm::consumeError(U.takeError()); + } + } + ProximityScore = std::max(IndexProximity, ProximityScore); } void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) { @@ -190,10 +239,11 @@ Forbidden = true; if (SemaCCResult.Declaration) { - // We boost things that have decls in the main file. - // The real proximity scores would be more general when we have them. + // We boost things that have decls in the main file. We give a fixed score + // for all other declarations in sema as they are already included in the + // translation unit. float DeclProximity = - hasDeclInMainFile(*SemaCCResult.Declaration) ? 1.0 : 0.0; + hasDeclInMainFile(*SemaCCResult.Declaration) ? 1.0 : 0.6; ProximityScore = std::max(DeclProximity, ProximityScore); } Index: unittests/clangd/QualityTests.cpp =================================================================== --- unittests/clangd/QualityTests.cpp +++ unittests/clangd/QualityTests.cpp @@ -18,12 +18,19 @@ //===----------------------------------------------------------------------===// #include "Quality.h" +#include "TestFS.h" #include "TestTU.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace clang { namespace clangd { + +// Force the unittest URI scheme to be linked, +extern volatile int UnittestSchemeAnchorSource; +static int LLVM_ATTRIBUTE_UNUSED UnittestSchemeAnchorDest = + UnittestSchemeAnchorSource; + namespace { TEST(QualityTests, SymbolQualitySignalExtraction) { @@ -90,7 +97,7 @@ EXPECT_FLOAT_EQ(Relevance.ProximityScore, 1.0) << "Decl in current file"; Relevance = {}; Relevance.merge(CodeCompletionResult(&findDecl(AST, "header"), 42)); - EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.0) << "Decl from header"; + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.6) << "Decl from header"; Relevance = {}; Relevance.merge(CodeCompletionResult(&findDecl(AST, "header_main"), 42)); EXPECT_FLOAT_EQ(Relevance.ProximityScore, 1.0) << "Current file and header"; @@ -167,6 +174,113 @@ EXPECT_LT(sortText(0, "a"), sortText(0, "z")); } +// {a,b,c} becomes /clangd-test/a/b/c +std::string joinPaths(llvm::ArrayRef Parts) { + return testPath( + llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator())); +} + +Symbol symWithDeclURI(StringRef Path, std::string &Storage, + StringRef Scheme = "file") { + Symbol Sym; + auto U = URI::create(Path, Scheme); + EXPECT_TRUE(static_cast(U)) << llvm::toString(U.takeError()); + Storage = U->toString(); + Sym.CanonicalDeclaration.FileURI = Storage; + return Sym; +} + +TEST(QualityTests, IndexSymbolProximityScores) { + std::string Path = joinPaths({"a", "b", "c", "x"}); + + std::string Storage; + { + SymbolRelevanceSignals Relevance; + Relevance.merge(symWithDeclURI(joinPaths({"a", "b", "c", "x"}), Storage), + {Path}); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 1.0); + } + { + SymbolRelevanceSignals Relevance; + Relevance.merge(symWithDeclURI(joinPaths({"a", "b", "c", "y"}), Storage), + {Path}); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.9); + } + { + SymbolRelevanceSignals Relevance; + Relevance.merge(symWithDeclURI(joinPaths({"a", "y"}), Storage), {Path}); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.80); + } + { + SymbolRelevanceSignals Relevance; + Relevance.merge( + symWithDeclURI(joinPaths({"a", "b", "c", "d", "y"}), Storage), {Path}); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.85); + } + { + SymbolRelevanceSignals Relevance; + Relevance.merge( + symWithDeclURI(joinPaths({"a", "b", "m", "n", "o", "y"}), Storage), + {Path}); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.4); + } + { + SymbolRelevanceSignals Relevance; + Relevance.merge( + symWithDeclURI(joinPaths({"a", "t", "m", "n", "o", "y"}), Storage), + {Path}); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.2); + } + { + // Note the common directory is /clang-test/ + SymbolRelevanceSignals Relevance; + Relevance.merge(symWithDeclURI(joinPaths({"m", "n", "o", "y"}), Storage), + {Path}); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.2); + } + { + // Different URI schemes. + SymbolRelevanceSignals Relevance; + Relevance.merge(symWithDeclURI(joinPaths({"a", "b", "test-root", "a", "y"}), + Storage, + /*Scheme=*/"unittest"), + {Path}); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0); + } +} + +TEST(QualityTests, IndexSymbolProximityScoresWithTestURI) { + std::string Path = joinPaths({"a", "test-root", "b", "c", "x"}); + std::string Storage; + { + SymbolRelevanceSignals Relevance; + Relevance.merge( + symWithDeclURI(joinPaths({"remote", "test-root", "b", "c", "x"}), + Storage, + /*Scheme=*/"unittest"), + {Path}); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 1); + } + { + SymbolRelevanceSignals Relevance; + Relevance.merge(symWithDeclURI(joinPaths({"remote", "test-root", "y"}), + Storage, + /*Scheme=*/"unittest"), + {Path}); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.8); + } + { + SymbolRelevanceSignals Relevance; + // unittest:///b/c/x vs unittest:///m/n/y. No common directory. + Relevance.merge( + symWithDeclURI(joinPaths({"remote", "test-root", "m", "n", "y"}), + Storage, + /*Scheme=*/"unittest"), + {Path}); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.0); + } +} + } // namespace } // namespace clangd } // namespace clang Index: unittests/clangd/TestFS.cpp =================================================================== --- unittests/clangd/TestFS.cpp +++ unittests/clangd/TestFS.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// #include "TestFS.h" +#include "URI.h" #include "llvm/Support/Errc.h" #include "gtest/gtest.h" @@ -62,5 +63,50 @@ return Path.str(); } +// Assume all files in the schema have a "test-root/" root directory, and the +// schema path is the relative path to the root directory. +// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z". +class TestScheme : public URIScheme { +public: + static const char *Scheme; + + static const char *TestRoot; + + llvm::Expected + getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body, + llvm::StringRef HintPath) const override { + auto Pos = HintPath.find(TestRoot); + assert(Pos != llvm::StringRef::npos); + return (HintPath.substr(0, Pos + llvm::StringRef(TestRoot).size()) + Body) + .str(); + } + + llvm::Expected + uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { + auto Pos = AbsolutePath.find(TestRoot); + if (Pos == llvm::StringRef::npos) + return llvm::make_error( + llvm::Twine("Directory ") + TestRoot + " not found in path " + + AbsolutePath, + llvm::inconvertibleErrorCode()); + + return URI(Scheme, /*Authority=*/"", + AbsolutePath.substr(Pos + llvm::StringRef(TestRoot).size())); + } +}; + +const char *TestScheme::Scheme = "unittest"; +#ifdef _WIN32 +const char *TestScheme::TestRoot = "\\test-root\\"; +#else +const char *TestScheme::TestRoot = "/test-root/"; +#endif + +static URISchemeRegistry::Add X(TestScheme::Scheme, "Test schema"); + +// This anchor is used to force the linker to link in the generated object file +// and thus register the plugin. +volatile int UnittestSchemeAnchorSource = 0; + } // namespace clangd } // namespace clang Index: unittests/clangd/URITests.cpp =================================================================== --- unittests/clangd/URITests.cpp +++ unittests/clangd/URITests.cpp @@ -14,6 +14,12 @@ namespace clang { namespace clangd { + +// Force the unittest URI scheme to be linked, +extern volatile int UnittestSchemeAnchorSource; +static int LLVM_ATTRIBUTE_UNUSED UnittestSchemeAnchorDest = + UnittestSchemeAnchorSource; + namespace { using ::testing::AllOf; @@ -22,38 +28,6 @@ MATCHER_P(Authority, A, "") { return arg.authority() == A; } MATCHER_P(Body, B, "") { return arg.body() == B; } -// Assume all files in the schema have a "test-root/" root directory, and the -// schema path is the relative path to the root directory. -// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z". -class TestScheme : public URIScheme { -public: - static const char *Scheme; - - static const char *TestRoot; - - llvm::Expected - getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body, - llvm::StringRef HintPath) const override { - auto Pos = HintPath.find(TestRoot); - assert(Pos != llvm::StringRef::npos); - return (HintPath.substr(0, Pos + llvm::StringRef(TestRoot).size()) + Body) - .str(); - } - - llvm::Expected - uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { - auto Pos = AbsolutePath.find(TestRoot); - assert(Pos != llvm::StringRef::npos); - return URI(Scheme, /*Authority=*/"", - AbsolutePath.substr(Pos + llvm::StringRef(TestRoot).size())); - } -}; - -const char *TestScheme::Scheme = "unittest"; -const char *TestScheme::TestRoot = "/test-root/"; - -static URISchemeRegistry::Add X(TestScheme::Scheme, "Test schema"); - std::string createOrDie(llvm::StringRef AbsolutePath, llvm::StringRef Scheme = "file") { auto Uri = URI::create(AbsolutePath, Scheme);