diff --git a/clang-tools-extra/clangd/FuzzyMatch.cpp b/clang-tools-extra/clangd/FuzzyMatch.cpp --- a/clang-tools-extra/clangd/FuzzyMatch.cpp +++ b/clang-tools-extra/clangd/FuzzyMatch.cpp @@ -71,7 +71,7 @@ // Score field is 15 bits wide, min value is -2^14, we use half of that. static constexpr int AwfulScore = -(1 << 13); static bool isAwful(int S) { return S < AwfulScore / 2; } -static constexpr int PerfectBonus = 3; // Perfect per-pattern-char score. +static constexpr int PerfectBonus = 4; // Perfect per-pattern-char score. FuzzyMatcher::FuzzyMatcher(llvm::StringRef Pattern) : PatN(std::min(MaxPat, Pattern.size())), @@ -267,24 +267,31 @@ } int FuzzyMatcher::skipPenalty(int W, Action Last) const { - int S = 0; + if (W == 0) // Skipping the first character. + return 3; if (WordRole[W] == Head) // Skipping a segment. - S += 1; - if (Last == Match) // Non-consecutive match. - S += 2; // We'd rather skip a segment than split our match. - return S; + return 1; // We want to keep this lower than a consecutive match bonus. + // Instead of penalizing non-consecutive matches, we give a bonus to a + // consecutive match in matchBonus. This produces a better score distribution + // than penalties in case of small patterns, e.g. 'up' for 'unique_ptr'. + return 0; } int FuzzyMatcher::matchBonus(int P, int W, Action Last) const { assert(LowPat[P] == LowWord[W]); int S = 1; - // Bonus: pattern so far is a (case-insensitive) prefix of the word. - if (P == W) // We can't skip pattern characters, so we must have matched all. - ++S; + bool IsPatSingleCase = + (PatTypeSet == 1 << Lower) || (PatTypeSet == 1 << Upper); // Bonus: case matches, or a Head in the pattern aligns with one in the word. - if ((Pat[P] == Word[W] && ((PatTypeSet & 1 << Upper) || P == W)) || - (PatRole[P] == Head && WordRole[W] == Head)) + // Single-case patterns lack segmentation signals and we assume any character + // can be a head of a segment. + if (Pat[P] == Word[W] || + (WordRole[W] == Head && (IsPatSingleCase || PatRole[P] == Head))) ++S; + // Bonus: a consecutive match. First character match also gets a bonus to + // ensure prefix final match score normalizes to 1.0. + if (W == 0 || Last == Match) + S += 2; // Penalty: matching inside a segment (and previous char wasn't matched). if (WordRole[W] == Tail && P && Last == Miss) S -= 3; diff --git a/clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp b/clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp --- a/clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp +++ b/clang-tools-extra/unittests/clangd/FuzzyMatchTests.cpp @@ -9,6 +9,7 @@ #include "FuzzyMatch.h" #include "llvm/ADT/StringExtras.h" +#include "gmock/gmock-matchers.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -247,6 +248,8 @@ EXPECT_THAT("foo", ranks("[foo]", "[Foo]")); EXPECT_THAT("onMes", ranks("[onMes]sage", "[onmes]sage", "[on]This[M]ega[Es]capes")); + EXPECT_THAT("onmes", + ranks("[onmes]sage", "[onMes]sage", "[on]This[M]ega[Es]capes")); EXPECT_THAT("CC", ranks("[C]amel[C]ase", "[c]amel[C]ase")); EXPECT_THAT("cC", ranks("[c]amel[C]ase", "[C]amel[C]ase")); EXPECT_THAT("p", ranks("[p]", "[p]arse", "[p]osix", "[p]afdsa", "[p]ath")); @@ -270,12 +273,18 @@ // Verify some bounds so we know scores fall in the right range. // Testing exact scores is fragile, so we prefer Ranking tests. TEST(FuzzyMatch, Scoring) { - EXPECT_THAT("abs", matches("[a]w[B]xYz[S]", 0.f)); + EXPECT_THAT("abs", matches("[a]w[B]xYz[S]", 7.f / 12.f)); EXPECT_THAT("abs", matches("[abs]l", 1.f)); EXPECT_THAT("abs", matches("[abs]", 2.f)); EXPECT_THAT("Abs", matches("[abs]", 2.f)); } +TEST(FuzzyMatch, InitialismAndPrefix) { + // We want these scores to be roughly the same. + EXPECT_THAT("up", matches("[u]nique_[p]tr", 3.f / 4.f)); + EXPECT_THAT("up", matches("[up]per_bound", 1.f)); +} + // Returns pretty-printed segmentation of Text. // e.g. std::basic_string --> +-- +---- +----- std::string segment(llvm::StringRef Text) {