Index: clangd/CMakeLists.txt =================================================================== --- clangd/CMakeLists.txt +++ clangd/CMakeLists.txt @@ -28,6 +28,7 @@ Logger.cpp Protocol.cpp ProtocolHandlers.cpp + Quality.cpp SourceCode.cpp Threading.cpp Trace.cpp Index: clangd/CodeComplete.cpp =================================================================== --- clangd/CodeComplete.cpp +++ clangd/CodeComplete.cpp @@ -19,6 +19,7 @@ #include "Compiler.h" #include "FuzzyMatch.h" #include "Logger.h" +#include "Quality.h" #include "SourceCode.h" #include "Trace.h" #include "index/Index.h" @@ -32,6 +33,9 @@ #include "llvm/Support/Format.h" #include +// We log detailed candidate here if you run with -debug-only=codecomplete. +#define DEBUG_TYPE "codecomplete" + namespace clang { namespace clangd { namespace { @@ -190,35 +194,6 @@ return Result; } -// Produces an integer that sorts in the same order as F. -// That is: a < b <==> encodeFloat(a) < encodeFloat(b). -uint32_t encodeFloat(float F) { - static_assert(std::numeric_limits::is_iec559, ""); - static_assert(sizeof(float) == sizeof(uint32_t), ""); - constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); - - // Get the bits of the float. Endianness is the same as for integers. - uint32_t U; - memcpy(&U, &F, sizeof(float)); - // IEEE 754 floats compare like sign-magnitude integers. - if (U & TopBit) // Negative float. - return 0 - U; // Map onto the low half of integers, order reversed. - return U + TopBit; // Positive floats map onto the high half of integers. -} - -// Returns a string that sorts in the same order as (-Score, Name), for LSP. -std::string sortText(float Score, llvm::StringRef Name) { - // We convert -Score to an integer, and hex-encode for readability. - // Example: [0.5, "foo"] -> "41000000foo" - std::string S; - llvm::raw_string_ostream OS(S); - write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower, - /*Width=*/2 * sizeof(Score)); - OS << Name; - OS.flush(); - return S; -} - /// A code completion result, in clang-native form. /// It may be promoted to a CompletionItem if it's among the top-ranked results. struct CompletionCandidate { @@ -227,33 +202,6 @@ const CodeCompletionResult *SemaResult = nullptr; const Symbol *IndexResult = nullptr; - // Computes the "symbol quality" score for this completion. Higher is better. - float score() const { - float Score = 1; - if (IndexResult) - Score *= quality(*IndexResult); - if (SemaResult) { - // For now we just use the Sema priority, mapping it onto a 0-2 interval. - // That makes 1 neutral-ish, so we don't reward/penalize non-Sema results. - // Priority 80 is a really bad score. - Score *= 2 - std::min(80, SemaResult->Priority) / 40; - - switch (static_cast(SemaResult->Availability)) { - case CXAvailability_Available: - // No penalty. - break; - case CXAvailability_Deprecated: - Score *= 0.1f; - break; - case CXAvailability_NotAccessible: - case CXAvailability_NotAvailable: - Score = 0; - break; - } - } - return Score; - } - // Builds an LSP completion item. CompletionItem build(llvm::StringRef FileName, const CompletionItemScores &Scores, @@ -319,6 +267,7 @@ return I; } }; +using ScoredCandidate = std::pair; // Determine the symbol ID for a Sema code completion result, if possible. llvm::Optional getSymbolID(const CodeCompletionResult &R) { @@ -525,50 +474,12 @@ UniqueFunction ResultsCallback; }; -// Tracks a bounded number of candidates with the best scores. -class TopN { -public: - using value_type = std::pair; - static constexpr size_t Unbounded = std::numeric_limits::max(); - - TopN(size_t N) : N(N) {} - - // Adds a candidate to the set. - // Returns true if a candidate was dropped to get back under N. - bool push(value_type &&V) { - bool Dropped = false; - if (Heap.size() >= N) { - Dropped = true; - if (N > 0 && greater(V, Heap.front())) { - std::pop_heap(Heap.begin(), Heap.end(), greater); - Heap.back() = std::move(V); - std::push_heap(Heap.begin(), Heap.end(), greater); - } - } else { - Heap.push_back(std::move(V)); - std::push_heap(Heap.begin(), Heap.end(), greater); - } - assert(Heap.size() <= N); - assert(std::is_heap(Heap.begin(), Heap.end(), greater)); - return Dropped; - } - - // Returns candidates from best to worst. - std::vector items() && { - std::sort_heap(Heap.begin(), Heap.end(), greater); - assert(Heap.size() <= N); - return std::move(Heap); - } - -private: - static bool greater(const value_type &L, const value_type &R) { +struct ScoredCandidateGreater { + bool operator()(const ScoredCandidate &L, const ScoredCandidate &R) { if (L.second.finalScore != R.second.finalScore) return L.second.finalScore > R.second.finalScore; return L.first.Name < R.first.Name; // Earlier name is better. } - - const size_t N; - std::vector Heap; // Min-heap, comparator is greater(). }; class SignatureHelpCollector final : public CodeCompleteConsumer { @@ -976,7 +887,8 @@ const SymbolSlab &IndexResults) { trace::Span Tracer("Merge and score results"); // We only keep the best N results at any time, in "native" format. - TopN Top(Opts.Limit == 0 ? TopN::Unbounded : Opts.Limit); + TopN Top( + Opts.Limit == 0 ? std::numeric_limits::max() : Opts.Limit); llvm::DenseSet UsedIndexResults; auto CorrespondingIndexResult = [&](const CodeCompletionResult &SemaResult) -> const Symbol * { @@ -1002,23 +914,42 @@ } // Scores a candidate and adds it to the TopN structure. - void addCandidate(TopN &Candidates, const CodeCompletionResult *SemaResult, + void addCandidate(TopN &Candidates, + const CodeCompletionResult *SemaResult, const Symbol *IndexResult) { CompletionCandidate C; C.SemaResult = SemaResult; C.IndexResult = IndexResult; C.Name = IndexResult ? IndexResult->Name : Recorder->getName(*SemaResult); - CompletionItemScores Scores; + SymbolQualitySignals Quality; + SymbolRelevanceSignals Relevance; if (auto FuzzyScore = Filter->match(C.Name)) - Scores.filterScore = *FuzzyScore; + Relevance.NameMatch = *FuzzyScore; else return; - Scores.symbolScore = C.score(); - // We score candidates by multiplying symbolScore ("quality" of the result) - // with filterScore (how well it matched the query). - // This is sensitive to the distribution of both component scores! - Scores.finalScore = Scores.filterScore * Scores.symbolScore; + if (IndexResult) + Quality.merge(*IndexResult); + if (SemaResult) { + Quality.merge(*SemaResult); + Relevance.merge(*SemaResult); + } + + float QualScore = Quality.evaluate(), RelScore = Relevance.evaluate(); + CompletionItemScores Scores; + Scores.finalScore = evaluateSymbolAndRelevance(QualScore, RelScore); + // The purpose of exporting component scores is to allow NameMatch to be + // replaced on the client-side. So we export (NameMatch, final/NameMatch) + // rather than (RelScore, QualScore). + Scores.filterScore = Relevance.NameMatch; + Scores.symbolScore = + Scores.filterScore ? Scores.finalScore / Scores.filterScore : QualScore; + + DEBUG(llvm::dbgs() << "CodeComplete: " << C.Name + << (IndexResult ? " (index)" : "") + << (SemaResult ? " (sema)" : "") << " = " + << Scores.finalScore << "\n" + << Quality << Relevance << "\n"); NSema += bool(SemaResult); NIndex += bool(IndexResult); Index: clangd/Quality.h =================================================================== --- /dev/null +++ clangd/Quality.h @@ -0,0 +1,126 @@ +//===--- Quality.h - Ranking alternatives for ambiguous queries -*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +/// +/// Some operations such as code completion produce a set of candidates. +/// Usually the user can choose between them, but we should put the best options +/// at the top (they're easier to select, and more likely to be seen). +/// +/// This file defines building blocks for ranking candidates. +/// It's used by the features directly and also in the implementation of +/// indexes, as indexes also need to heuristically limit their results. +/// +/// The facilities here are: +/// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString +/// These are structured in a way that they can be debugged, and are fairly +/// consistent regardless of the source. +/// - compute scores from scoring signals. These are suitable for sorting. +/// - sorting utilities like the TopN container. +/// These could be split up further to isolate dependencies if we care. +/// +//===---------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H +#include "llvm/ADT/StringRef.h" +#include +#include +#include +namespace llvm { +class raw_ostream; +} +namespace clang { +class CodeCompletionResult; +namespace clangd { +struct Symbol; + +// Signals structs are designed to be aggregated from 0 or more sources. +// A default instance has neutral signals, and sources are merged into it. +// They can be dumped for debugging, and evaluate()d into a score. + +/// Attributes of a symbol that affect how much we like it. +struct SymbolQualitySignals { + unsigned SemaCCPriority = 0; // 1-80, 1 is best. 0 means absent. + // FIXME: this is actually a mix of symbol + // quality and relevance. Untangle this. + bool Deprecated = false; + unsigned References = 0; + + void merge(const CodeCompletionResult &SemaCCResult); + void merge(const Symbol &IndexResult); + + // Condense these signals down to a single number, higher is better. + float evaluate() const; +}; +llvm::raw_ostream &operator<<(llvm::raw_ostream &, + const SymbolQualitySignals &); + +/// Attributes of a symbol-query pair that affect how much we like it. +struct SymbolRelevanceSignals { + // 0-1 fuzzy-match score for unqualified name. Must be explicitly assigned. + float NameMatch = 1; + bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). + + void merge(const CodeCompletionResult &SemaResult); + + // Condense these signals down to a single number, higher is better. + float evaluate() const; +}; +llvm::raw_ostream &operator<<(llvm::raw_ostream &, + const SymbolRelevanceSignals &); + +/// Combine symbol quality and relevance into a single score. +float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); + +/// TopN is a lossy container that preserves only the "best" N elements. +template > class TopN { +public: + using value_type = T; + TopN(size_t N, Compare Greater = Compare()) + : N(N), Greater(std::move(Greater)) {} + + // Adds a candidate to the set. + // Returns true if a candidate was dropped to get back under N. + bool push(value_type &&V) { + bool Dropped = false; + if (Heap.size() >= N) { + Dropped = true; + if (N > 0 && Greater(V, Heap.front())) { + std::pop_heap(Heap.begin(), Heap.end(), Greater); + Heap.back() = std::move(V); + std::push_heap(Heap.begin(), Heap.end(), Greater); + } + } else { + Heap.push_back(std::move(V)); + std::push_heap(Heap.begin(), Heap.end(), Greater); + } + assert(Heap.size() <= N); + assert(std::is_heap(Heap.begin(), Heap.end(), Greater)); + return Dropped; + } + + // Returns candidates from best to worst. + std::vector items() && { + std::sort_heap(Heap.begin(), Heap.end(), Greater); + assert(Heap.size() <= N); + return std::move(Heap); + } + +private: + const size_t N; + std::vector Heap; // Min-heap, comparator is Greater. + Compare Greater; +}; + +/// Returns a string that sorts in the same order as (-Score, Tiebreak), for LSP. +/// (The highest score compares smallest so it sorts at the top). +std::string sortText(float Score, llvm::StringRef Tiebreak = ""); + +} // namespace clangd +} // namespace clang + +#endif Index: clangd/Quality.cpp =================================================================== --- /dev/null +++ clangd/Quality.cpp @@ -0,0 +1,108 @@ +//===--- Quality.cpp --------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +#include "Quality.h" +#include "index/Index.h" +#include "clang/Sema/CodeCompleteConsumer.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace clangd { +using namespace llvm; + +void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) { + SemaCCPriority = SemaCCResult.Priority; + + if (SemaCCResult.Availability == CXAvailability_Deprecated) + Deprecated = true; +} + +void SymbolQualitySignals::merge(const Symbol &IndexResult) { + References = std::max(IndexResult.References, References); +} + +float SymbolQualitySignals::evaluate() const { + float Score = 1; + + // This avoids a sharp gradient for tail symbols, and also neatly avoids the + // question of whether 0 references means a bad symbol or missing data. + if (References >= 3) + Score *= std::log(References); + + if (SemaCCPriority) + // Map onto a 0-2 interval, so we don't reward/penalize non-Sema results. + // Priority 80 is a really bad score. + Score *= 2 - std::min(80, SemaCCPriority) / 40; + + if (Deprecated) + Score *= 0.1; + + return Score; +} + +raw_ostream &operator<<(raw_ostream &OS, const SymbolQualitySignals &S) { + OS << formatv("=== Symbol quality: {0}\n", S.evaluate()); + if (S.SemaCCPriority) + OS << formatv("\tSemaCCPriority: {0}\n", S.SemaCCPriority); + OS << formatv("\tReferences: {0}\n", S.References); + OS << formatv("\tDeprecated: {0}\n", S.Deprecated); + return OS; +} + +void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) { + if (SemaCCResult.Availability == CXAvailability_NotAvailable || + SemaCCResult.Availability == CXAvailability_NotAccessible) + Forbidden = true; +} + +float SymbolRelevanceSignals::evaluate() const { + if (Forbidden) + return 0; + return NameMatch; +} +raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) { + OS << formatv("=== Symbol relevance: {0}\n", S.evaluate()); + OS << formatv("\tName match: {0}\n", S.NameMatch); + OS << formatv("\tForbidden: {0}\n", S.Forbidden); + return OS; +} + +float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) { + return SymbolQuality * SymbolRelevance; +} + +// Produces an integer that sorts in the same order as F. +// That is: a < b <==> encodeFloat(a) < encodeFloat(b). +static uint32_t encodeFloat(float F) { + static_assert(std::numeric_limits::is_iec559, ""); + constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1); + + // Get the bits of the float. Endianness is the same as for integers. + uint32_t U = FloatToBits(F); + // IEEE 754 floats compare like sign-magnitude integers. + if (U & TopBit) // Negative float. + return 0 - U; // Map onto the low half of integers, order reversed. + return U + TopBit; // Positive floats map onto the high half of integers. +} + +std::string sortText(float Score, llvm::StringRef Name) { + // We convert -Score to an integer, and hex-encode for readability. + // Example: [0.5, "foo"] -> "41000000foo" + std::string S; + llvm::raw_string_ostream OS(S); + write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower, + /*Width=*/2 * sizeof(Score)); + OS << Name; + OS.flush(); + return S; +} + +} // namespace clangd +} // namespace clang Index: unittests/clangd/CMakeLists.txt =================================================================== --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -23,10 +23,12 @@ HeadersTests.cpp IndexTests.cpp JSONExprTests.cpp + QualityTests.cpp SourceCodeTests.cpp SymbolCollectorTests.cpp SyncAPI.cpp TestFS.cpp + TestTU.cpp ThreadingTests.cpp TraceTests.cpp TUSchedulerTests.cpp Index: unittests/clangd/ClangdUnitTests.cpp =================================================================== --- unittests/clangd/ClangdUnitTests.cpp +++ unittests/clangd/ClangdUnitTests.cpp @@ -10,10 +10,7 @@ #include "Annotations.h" #include "ClangdUnit.h" #include "SourceCode.h" -#include "TestFS.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/PCHContainerOperations.h" -#include "clang/Frontend/Utils.h" +#include "TestTU.h" #include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -36,24 +33,6 @@ return Field(&Diag::Notes, ElementsAre(NoteMatcher)); } -// FIXME: this is duplicated with FileIndexTests. Share it. -ParsedAST build(StringRef Code, std::vector Flags = {}) { - std::vector Cmd = {"clang", "main.cpp"}; - Cmd.insert(Cmd.begin() + 1, Flags.begin(), Flags.end()); - auto CI = createInvocationFromCommandLine(Cmd); - auto Buf = MemoryBuffer::getMemBuffer(Code); - auto AST = ParsedAST::Build(std::move(CI), nullptr, std::move(Buf), - std::make_shared(), - vfs::getRealFileSystem()); - assert(AST.hasValue()); - return std::move(*AST); -} - -std::vector buildDiags(llvm::StringRef Code, - std::vector Flags = {}) { - return build(Code, std::move(Flags)).getDiagnostics(); -} - MATCHER_P2(Diag, Range, Message, "Diag at " + llvm::to_string(Range) + " = [" + Message + "]") { return arg.Range == Range && arg.Message == Message; @@ -105,7 +84,7 @@ } )cpp"); EXPECT_THAT( - buildDiags(Test.code()), + TestTU::withCode(Test.code()).build().getDiagnostics(), ElementsAre( // This range spans lines. AllOf(Diag(Test.range("typo"), @@ -123,13 +102,15 @@ TEST(DiagnosticsTest, FlagsMatter) { Annotations Test("[[void]] main() {}"); - EXPECT_THAT(buildDiags(Test.code()), + auto TU = TestTU::withCode(Test.code()); + EXPECT_THAT(TU.build().getDiagnostics(), ElementsAre(AllOf(Diag(Test.range(), "'main' must return 'int'"), WithFix(Fix(Test.range(), "int", "change 'void' to 'int'"))))); // Same code built as C gets different diagnostics. + TU.Filename = "Plain.c"; EXPECT_THAT( - buildDiags(Test.code(), {"-x", "c"}), + TU.build().getDiagnostics(), ElementsAre(AllOf( Diag(Test.range(), "return type of 'main' is not 'int'"), WithFix(Fix(Test.range(), "int", "change return type to 'int'"))))); @@ -150,7 +131,7 @@ #endif )cpp"); EXPECT_THAT( - buildDiags(Test.code()), + TestTU::withCode(Test.code()).build().getDiagnostics(), ElementsAre(Diag(Test.range(), "use of undeclared identifier 'b'"))); } @@ -229,7 +210,7 @@ "int ^λλ^λ();", // UTF-8 handled properly when backing up }) { Annotations TestCase(Text); - auto AST = build(TestCase.code()); + auto AST = TestTU::withCode(TestCase.code()).build(); const auto &SourceMgr = AST.getASTContext().getSourceManager(); SourceLocation Actual = getBeginningOfIdentifier( AST, TestCase.points().back(), SourceMgr.getMainFileID()); Index: unittests/clangd/FileIndexTests.cpp =================================================================== --- unittests/clangd/FileIndexTests.cpp +++ unittests/clangd/FileIndexTests.cpp @@ -7,11 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "TestFS.h" +#include "TestTU.h" #include "index/FileIndex.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/PCHContainerOperations.h" -#include "clang/Frontend/Utils.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -83,38 +80,19 @@ return Matches; } -/// Create an ParsedAST for \p Code. Returns None if \p Code is empty. -/// \p Code is put into .h which is included by \p .cpp. -llvm::Optional build(llvm::StringRef BasePath, - llvm::StringRef Code) { - if (Code.empty()) - return llvm::None; - - assert(llvm::sys::path::extension(BasePath).empty() && - "BasePath must be a base file path without extension."); - llvm::IntrusiveRefCntPtr VFS( - new vfs::InMemoryFileSystem); - std::string Path = testPath((BasePath + ".cpp").str()); - std::string Header = testPath((BasePath + ".h").str()); - VFS->addFile(Path, 0, llvm::MemoryBuffer::getMemBuffer("")); - VFS->addFile(Header, 0, llvm::MemoryBuffer::getMemBuffer(Code)); - const char *Args[] = {"clang", "-xc++", "-include", Header.c_str(), - Path.c_str()}; - - auto CI = createInvocationFromCommandLine(Args); - - auto Buf = llvm::MemoryBuffer::getMemBuffer(Code); - auto AST = ParsedAST::Build(std::move(CI), nullptr, std::move(Buf), - std::make_shared(), VFS); - assert(AST.hasValue()); - return std::move(*AST); +// Adds Basename.cpp, which includes Basename.h, which contains Code. +void update(FileIndex &M, llvm::StringRef Basename, llvm::StringRef Code) { + TestTU File; + File.Filename = (Basename + ".cpp").str(); + File.HeaderFilename = (Basename + ".h").str(); + File.HeaderCode = Code; + auto AST = File.build(); + M.update(File.Filename, &AST); } TEST(FileIndexTest, IndexAST) { FileIndex M; - M.update( - "f1", - build("f1", "namespace ns { void f() {} class X {}; }").getPointer()); + update(M, "f1", "namespace ns { void f() {} class X {}; }"); FuzzyFindRequest Req; Req.Query = ""; @@ -124,10 +102,7 @@ TEST(FileIndexTest, NoLocal) { FileIndex M; - M.update( - "f1", - build("f1", "namespace ns { void f() { int local = 0; } class X {}; }") - .getPointer()); + update(M, "f1", "namespace ns { void f() { int local = 0; } class X {}; }"); FuzzyFindRequest Req; Req.Query = ""; @@ -136,12 +111,8 @@ TEST(FileIndexTest, IndexMultiASTAndDeduplicate) { FileIndex M; - M.update( - "f1", - build("f1", "namespace ns { void f() {} class X {}; }").getPointer()); - M.update( - "f2", - build("f2", "namespace ns { void ff() {} class X {}; }").getPointer()); + update(M, "f1", "namespace ns { void f() {} class X {}; }"); + update(M, "f2", "namespace ns { void ff() {} class X {}; }"); FuzzyFindRequest Req; Req.Query = ""; @@ -151,30 +122,26 @@ TEST(FileIndexTest, RemoveAST) { FileIndex M; - M.update( - "f1", - build("f1", "namespace ns { void f() {} class X {}; }").getPointer()); + update(M, "f1", "namespace ns { void f() {} class X {}; }"); FuzzyFindRequest Req; Req.Query = ""; Req.Scopes = {"ns::"}; EXPECT_THAT(match(M, Req), UnorderedElementsAre("ns::f", "ns::X")); - M.update("f1", nullptr); + M.update("f1.cpp", nullptr); EXPECT_THAT(match(M, Req), UnorderedElementsAre()); } TEST(FileIndexTest, RemoveNonExisting) { FileIndex M; - M.update("no", nullptr); + M.update("no.cpp", nullptr); EXPECT_THAT(match(M, FuzzyFindRequest()), UnorderedElementsAre()); } TEST(FileIndexTest, IgnoreClassMembers) { FileIndex M; - M.update("f1", - build("f1", "class X { static int m1; int m2; static void f(); };") - .getPointer()); + update(M, "f1", "class X { static int m1; int m2; static void f(); };"); FuzzyFindRequest Req; Req.Query = ""; @@ -183,7 +150,7 @@ TEST(FileIndexTest, NoIncludeCollected) { FileIndex M; - M.update("f", build("f", "class string {};").getPointer()); + update(M, "f", "class string {};"); FuzzyFindRequest Req; Req.Query = ""; @@ -206,7 +173,7 @@ )cpp"; FileIndex M; - M.update("f", build("f", Source).getPointer()); + update(M, "f", Source); FuzzyFindRequest Req; Req.Query = ""; Index: unittests/clangd/QualityTests.cpp =================================================================== --- /dev/null +++ unittests/clangd/QualityTests.cpp @@ -0,0 +1,123 @@ +//===-- SourceCodeTests.cpp ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Evaluating scoring functions isn't a great fit for assert-based tests. +// For interesting cases, both exact scores and "X beats Y" are too brittle to +// make good hard assertions. +// +// Here we test the signal extraction and sanity-check that signals point in +// the right direction. This should be supplemented by quality metrics which +// we can compute from a corpus of queries and preferred rankings. +// +//===----------------------------------------------------------------------===// + +#include "Quality.h" +#include "TestTU.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { + +TEST(QualityTests, SymbolQualitySignalExtraction) { + auto Header = TestTU::withHeaderCode(R"cpp( + int x; + + [[deprecated]] + int f() { return x; } + )cpp"); + auto Symbols = Header.headerSymbols(); + auto AST = Header.build(); + + SymbolQualitySignals Quality; + Quality.merge(findSymbol(Symbols, "x")); + EXPECT_FALSE(Quality.Deprecated); + EXPECT_EQ(Quality.SemaCCPriority, SymbolQualitySignals().SemaCCPriority); + EXPECT_EQ(Quality.References, SymbolQualitySignals().References); + + Symbol F = findSymbol(Symbols, "f"); + F.References = 24; // TestTU doesn't count references, so fake it. + Quality = {}; + Quality.merge(F); + EXPECT_FALSE(Quality.Deprecated); // FIXME: Include deprecated bit in index. + EXPECT_EQ(Quality.SemaCCPriority, SymbolQualitySignals().SemaCCPriority); + EXPECT_EQ(Quality.References, 24u); + + Quality = {}; + Quality.merge(CodeCompletionResult(&findDecl(AST, "f"), /*Priority=*/42)); + EXPECT_TRUE(Quality.Deprecated); + EXPECT_EQ(Quality.SemaCCPriority, 42u); + EXPECT_EQ(Quality.References, SymbolQualitySignals().References); +} + +TEST(QualityTests, SymbolRelevanceSignalExtraction) { + auto AST = TestTU::withHeaderCode(R"cpp( + [[deprecated]] + int f() { return 0; } + )cpp") + .build(); + + SymbolRelevanceSignals Relevance; + Relevance.merge(CodeCompletionResult(&findDecl(AST, "f"), /*Priority=*/42, + nullptr, false, /*Accessible=*/false)); + EXPECT_EQ(Relevance.NameMatch, SymbolRelevanceSignals().NameMatch); + EXPECT_TRUE(Relevance.Forbidden); +} + +// Do the signals move the scores in the direction we expect? +TEST(QualityTests, SymbolQualitySignalsSanity) { + SymbolQualitySignals Default; + EXPECT_EQ(Default.evaluate(), 1); + + SymbolQualitySignals Deprecated; + Deprecated.Deprecated = true; + EXPECT_LT(Deprecated.evaluate(), Default.evaluate()); + + SymbolQualitySignals WithReferences, ManyReferences; + WithReferences.References = 10; + ManyReferences.References = 1000; + EXPECT_GT(WithReferences.evaluate(), Default.evaluate()); + EXPECT_GT(ManyReferences.evaluate(), WithReferences.evaluate()); + + SymbolQualitySignals LowPriority, HighPriority; + LowPriority.SemaCCPriority = 60; + HighPriority.SemaCCPriority = 20; + EXPECT_GT(HighPriority.evaluate(), Default.evaluate()); + EXPECT_LT(LowPriority.evaluate(), Default.evaluate()); +} + +TEST(QualityTests, SymbolRelevanceSignalsSanity) { + SymbolRelevanceSignals Default; + EXPECT_EQ(Default.evaluate(), 1); + + SymbolRelevanceSignals Forbidden; + Forbidden.Forbidden = true; + EXPECT_LT(Forbidden.evaluate(), Default.evaluate()); + + SymbolRelevanceSignals PoorNameMatch; + PoorNameMatch.NameMatch = 0.2; + EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate()); +} + +TEST(QualityTests, SortText) { + EXPECT_LT(sortText(std::numeric_limits::infinity()), sortText(1000.2)); + EXPECT_LT(sortText(1000.2), sortText(1)); + EXPECT_LT(sortText(1), sortText(0.3)); + EXPECT_LT(sortText(0.3), sortText(0)); + EXPECT_LT(sortText(0), sortText(-10)); + EXPECT_LT(sortText(-10), sortText(-std::numeric_limits::infinity())); + + EXPECT_LT(sortText(1, "z"), sortText(0, "a")); + EXPECT_LT(sortText(0, "a"), sortText(0, "z")); +} + +} // namespace +} // namespace clangd +} // namespace clang Index: unittests/clangd/TestFS.cpp =================================================================== --- unittests/clangd/TestFS.cpp +++ unittests/clangd/TestFS.cpp @@ -20,8 +20,8 @@ new vfs::InMemoryFileSystem); for (auto &FileAndContents : Files) { MemFS->addFile(FileAndContents.first(), time_t(), - MemoryBuffer::getMemBuffer(FileAndContents.second, - FileAndContents.first())); + MemoryBuffer::getMemBufferCopy(FileAndContents.second, + FileAndContents.first())); } return MemFS; } Index: unittests/clangd/TestTU.h =================================================================== --- /dev/null +++ unittests/clangd/TestTU.h @@ -0,0 +1,59 @@ +//===--- TestTU.h - Scratch source files for testing ------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// Many tests for indexing, code completion etc are most naturally expressed +// using code examples. +// TestTU lets test define these examples in a common way without dealing with +// the mechanics of VFS and compiler interactions, and then easily grab the +// AST, particular symbols, etc. +// +//===---------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_TESTTU_H +#define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_TESTTU_H +#include "ClangdUnit.h" +#include "index/Index.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { + +struct TestTU { + static TestTU withCode(llvm::StringRef Code) { + TestTU TU; + TU.Code = Code; + return TU; + } + + static TestTU withHeaderCode(llvm::StringRef HeaderCode) { + TestTU TU; + TU.HeaderCode = HeaderCode; + return TU; + } + + // The code to be compiled. + std::string Code; + std::string Filename = "TestTU.cpp"; + + // Define contents of a header to be included by TestTU.cpp. + std::string HeaderCode; + std::string HeaderFilename = "TestTU.h"; + + ParsedAST build() const; + SymbolSlab headerSymbols() const; + std::unique_ptr index() const; +}; + +// Look up an index symbol by qualified name, which must be unique. +const Symbol &findSymbol(const SymbolSlab &, llvm::StringRef QName); +// Look up an AST symbol by qualified name, which must be unique and top-level. +const NamedDecl &findDecl(ParsedAST &AST, llvm::StringRef QName); + +} // namespace clangd +} // namespace clang +#endif Index: unittests/clangd/TestTU.cpp =================================================================== --- /dev/null +++ unittests/clangd/TestTU.cpp @@ -0,0 +1,95 @@ +//===--- TestTU.cpp - Scratch source files for testing ------------*- +//C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +#include "TestTU.h" +#include "TestFS.h" +#include "index/FileIndex.h" +#include "index/MemIndex.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/PCHContainerOperations.h" +#include "clang/Frontend/Utils.h" + +namespace clang { +namespace clangd { +using namespace llvm; + +ParsedAST TestTU::build() const { + std::string FullFilename = testPath(Filename), + FullHeaderName = testPath(HeaderFilename); + std::vector Cmd = {"clang", FullFilename.c_str()}; + // FIXME: this shouldn't need to be conditional, but it breaks a + // GoToDefinition test for some reason (getMacroArgExpandedLocation fails). + if (!HeaderCode.empty()) { + Cmd.push_back("-include"); + Cmd.push_back(FullHeaderName.c_str()); + } + auto AST = ParsedAST::Build( + createInvocationFromCommandLine(Cmd), nullptr, + MemoryBuffer::getMemBufferCopy(Code), + std::make_shared(), + buildTestFS({{FullFilename, Code}, {FullHeaderName, HeaderCode}})); + if (!AST.hasValue()) { + ADD_FAILURE() << "Failed to build code:\n" << Code; + llvm_unreachable("Failed to build TestTU!"); + } + return std::move(*AST); +} + +SymbolSlab TestTU::headerSymbols() const { + auto AST = build(); + return indexAST(&AST); +} + +std::unique_ptr TestTU::index() const { + return MemIndex::build(headerSymbols()); +} + +// Look up a symbol by qualified name, which must be unique. +const Symbol &findSymbol(const SymbolSlab &Slab, llvm::StringRef QName) { + const Symbol *Result = nullptr; + for (const Symbol &S : Slab) { + if (QName != (S.Scope + S.Name).str()) + continue; + if (Result) { + ADD_FAILURE() << "Multiple symbols named " << QName << ":\n" + << *Result << "\n---\n" + << S; + assert(false && "QName is not unique"); + } + Result = &S; + } + if (!Result) { + ADD_FAILURE() << "No symbol named " << QName << " in " + << ::testing::PrintToString(Slab); + assert(false && "No symbol with QName"); + } + return *Result; +} + +const NamedDecl &findDecl(ParsedAST &AST, llvm::StringRef QName) { + const NamedDecl *Result = nullptr; + for (const Decl *D : AST.getTopLevelDecls()) { + auto *ND = dyn_cast(D); + if (!ND || ND->getNameAsString() != QName) + continue; + if (Result) { + ADD_FAILURE() << "Multiple Decls named " << QName; + assert(false && "QName is not unique"); + } + Result = ND; + } + if (!Result) { + ADD_FAILURE() << "No Decl named " << QName << " in AST"; + assert(false && "No Decl with QName"); + } + return *Result; +} + +} // namespace clangd +} // namespace clang Index: unittests/clangd/XRefsTests.cpp =================================================================== --- unittests/clangd/XRefsTests.cpp +++ unittests/clangd/XRefsTests.cpp @@ -12,15 +12,13 @@ #include "Matchers.h" #include "SyncAPI.h" #include "TestFS.h" +#include "TestTU.h" #include "XRefs.h" -#include "gmock/gmock.h" #include "index/FileIndex.h" #include "index/SymbolCollector.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/PCHContainerOperations.h" -#include "clang/Frontend/Utils.h" #include "clang/Index/IndexingAction.h" #include "llvm/Support/Path.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" namespace clang { @@ -39,34 +37,6 @@ std::vector Diagnostics) override {} }; -// FIXME: this is duplicated with FileIndexTests. Share it. -ParsedAST build(StringRef MainCode, StringRef HeaderCode = "") { - auto HeaderPath = testPath("foo.h"); - auto MainPath = testPath("foo.cpp"); - llvm::IntrusiveRefCntPtr VFS( - new vfs::InMemoryFileSystem()); - VFS->addFile(MainPath, 0, llvm::MemoryBuffer::getMemBuffer(MainCode)); - VFS->addFile(HeaderPath, 0, llvm::MemoryBuffer::getMemBuffer(HeaderCode)); - std::vector Cmd = {"clang", "-xc++", MainPath.c_str()}; - if (!HeaderCode.empty()) { - std::vector args = {"-include", HeaderPath.c_str()}; - Cmd.insert(Cmd.begin() + 1, args.begin(), args.end()); - } - auto CI = createInvocationFromCommandLine(Cmd); - - auto Buf = MemoryBuffer::getMemBuffer(MainCode); - auto AST = ParsedAST::Build(std::move(CI), nullptr, std::move(Buf), - std::make_shared(), VFS); - assert(AST.hasValue()); - return std::move(*AST); -} - -std::unique_ptr buildIndex(StringRef MainCode, - StringRef HeaderCode) { - auto AST = build(MainCode, HeaderCode); - return MemIndex::build(indexAST(&AST)); -} - // Extracts ranges from an annotated example, and constructs a matcher for a // highlight set. Ranges should be named $read/$write as appropriate. Matcher &> @@ -117,7 +87,7 @@ }; for (const char *Test : Tests) { Annotations T(Test); - auto AST = build(T.code()); + auto AST = TestTU::withCode(T.code()).build(); EXPECT_THAT(findDocumentHighlights(AST, T.point()), HighlightsFrom(T)) << Test; } @@ -139,10 +109,12 @@ void $f1[[f1]]() {} )cpp"); - auto Index = buildIndex(SymbolCpp.code(), SymbolHeader.code()); + TestTU TU; + TU.Code = SymbolCpp.code(); + TU.HeaderCode = SymbolHeader.code(); + auto Index = TU.index(); auto runFindDefinitionsWithIndex = [&Index](const Annotations &Main) { - auto AST = build(/*MainCode=*/Main.code(), - /*HeaderCode=*/""); + auto AST = TestTU::withCode(Main.code()).build(); return clangd::findDefinitions(AST, Main.point(), Index.get()); }; @@ -329,7 +301,7 @@ }; for (const char *Test : Tests) { Annotations T(Test); - auto AST = build(T.code()); + auto AST = TestTU::withCode(T.code()).build(); std::vector> ExpectedLocations; for (const auto &R : T.ranges()) ExpectedLocations.push_back(RangeIs(R)); @@ -661,7 +633,7 @@ for (const OneTest &Test : Tests) { Annotations T(Test.Input); - auto AST = build(T.code()); + auto AST = TestTU::withCode(T.code()).build(); Hover H = getHover(AST, T.point()); EXPECT_EQ(H.contents.value, Test.ExpectedHover) << Test.Input;