Index: clangd/FuzzyMatch.h =================================================================== --- clangd/FuzzyMatch.h +++ clangd/FuzzyMatch.h @@ -56,16 +56,17 @@ bool init(llvm::StringRef Word); void buildGraph(); - void calculateRoles(const char *Text, CharRole *Out, int N); - int skipPenalty(int W, Action Last); - int matchBonus(int P, int W, Action Last); + void calculateRoles(const char *Text, CharRole *Out, int &Types, int N); + bool allowMatch(int P, int W) const; + int skipPenalty(int W, Action Last) const; + int matchBonus(int P, int W, Action Last) const; // Pattern data is initialized by the constructor, then constant. char Pat[MaxPat]; // Pattern data int PatN; // Length char LowPat[MaxPat]; // Pattern in lowercase CharRole PatRole[MaxPat]; // Pattern segmentation info - bool CaseSensitive; // Case-sensitive match if pattern has uppercase + int PatTypeSet; // Bitmask of 1<(MaxPat, Pattern.size())), CaseSensitive(false), + : PatN(std::min(MaxPat, Pattern.size())), ScoreScale(PatN ? float{1} / (PerfectBonus * PatN) : 0), WordN(0) { std::copy(Pattern.begin(), Pattern.begin() + PatN, Pat); - for (int I = 0; I < PatN; ++I) { + for (int I = 0; I < PatN; ++I) LowPat[I] = lower(Pat[I]); - CaseSensitive |= LowPat[I] != Pat[I]; - } Scores[0][0][Miss] = {0, Miss}; Scores[0][0][Match] = {AwfulScore, Miss}; for (int P = 0; P <= PatN; ++P) @@ -88,7 +88,7 @@ for (Action A : {Miss, Match}) Scores[P][W][A] = {AwfulScore, Miss}; if (PatN > 0) - calculateRoles(Pat, PatRole, PatN); + calculateRoles(Pat, PatRole, PatTypeSet, PatN); } Optional FuzzyMatcher::match(StringRef Word) { @@ -177,16 +177,21 @@ template static T packedLookup(const uint8_t *Data, int I) { return static_cast((Data[I >> 2] >> ((I & 3) * 2)) & 3); } -void FuzzyMatcher::calculateRoles(const char *Text, CharRole *Out, int N) { +void FuzzyMatcher::calculateRoles(const char *Text, CharRole *Out, int &TypeSet, + int N) { assert(N > 0); + CharType Type = packedLookup(CharTypes, Text[0]); + TypeSet = 1 << Type; // Types holds a sliding window of (Prev, Curr, Next) types. // Initial value is (Empty, Empty, type of Text[0]). - int Types = packedLookup(CharTypes, Text[0]); + int Types = Type; // Rotate slides in the type of the next character. auto Rotate = [&](CharType T) { Types = ((Types << 2) | T) & 0x3f; }; for (int I = 0; I < N - 1; ++I) { // For each character, rotate in the next, and look up the role. - Rotate(packedLookup(CharTypes, Text[I + 1])); + Type = packedLookup(CharTypes, Text[I + 1]); + TypeSet |= 1 << Type; + Rotate(Type); *Out++ = packedLookup(CharRoles, Types); } // For the last character, the "next character" is Empty. @@ -214,7 +219,10 @@ ++P; } - calculateRoles(Word, WordRole, WordN); + // FIXME: some words are hard to tokenize algorithmically. + // e.g. vsprintf is V S Print F, and should match [pri] but not [int]. + // We could add a tokenization dictionary for common stdlib names. + calculateRoles(Word, WordRole, WordTypeSet, WordN); return true; } @@ -247,7 +255,7 @@ ? ScoreInfo{MatchMissScore, Match} : ScoreInfo{MissMissScore, Miss}; - if (LowPat[P] != LowWord[W]) { // No match possible. + if (!allowMatch(P, W)) { Score[Match] = {AwfulScore, Miss}; } else { auto &PreMatch = Scores[P][W]; @@ -261,7 +269,22 @@ } } -int FuzzyMatcher::skipPenalty(int W, Action Last) { +bool FuzzyMatcher::allowMatch(int P, int W) const { + if (LowPat[P] != LowWord[W]) + return false; + // We require a "strong" match for the first pattern character only. + if (P > 0) + return true; + // Obvious "strong match" for first char: match against a word head. + // We're banning matches outright, so conservatively accept some other cases + // where our segmentation might be wrong: + // - allow matching B in ABCDef (but not in NDEBUG) + // - we'd like to accept print in sprintf, but too many false positives + return WordRole[W] != Tail || + (Word[W] != LowWord[W] && WordTypeSet & 1 << Lower); +} + +int FuzzyMatcher::skipPenalty(int W, Action Last) const { int S = 0; if (WordRole[W] == Head) // Skipping a segment. S += 1; @@ -270,14 +293,14 @@ return S; } -int FuzzyMatcher::matchBonus(int P, int W, Action Last) { +int FuzzyMatcher::matchBonus(int P, int W, Action Last) const { assert(LowPat[P] == LowWord[W]); int S = 1; // Bonus: pattern so far is a (case-insensitive) prefix of the word. if (P == W) // We can't skip pattern characters, so we must have matched all. ++S; // Bonus: case matches, or a Head in the pattern aligns with one in the word. - if ((Pat[P] == Word[W] && (CaseSensitive || P == W)) || + if ((Pat[P] == Word[W] && ((PatTypeSet & 1 << Upper) || P == W)) || (PatRole[P] == Head && WordRole[W] == Head)) ++S; // Penalty: matching inside a segment (and previous char wasn't matched). @@ -312,7 +335,7 @@ Scores[PatN][WordN][Miss].Score))) { OS << "Substring check passed, but all matches are forbidden\n"; } - if (!CaseSensitive) + if (!(PatTypeSet & 1 << Upper)) OS << "Lowercase query, so scoring ignores case\n"; // Traverse Matched table backwards to reconstruct the Pattern/Word mapping. Index: unittests/clangd/FuzzyMatchTests.cpp =================================================================== --- unittests/clangd/FuzzyMatchTests.cpp +++ unittests/clangd/FuzzyMatchTests.cpp @@ -20,16 +20,27 @@ using testing::Not; struct ExpectedMatch { - ExpectedMatch(StringRef Annotated) : Word(Annotated), Annotated(Annotated) { + // Annotations are optional, and will not be asserted if absent. + ExpectedMatch(StringRef Match) : Word(Match), Annotated(Match) { for (char C : "[]") Word.erase(std::remove(Word.begin(), Word.end(), C), Word.end()); + if (Word.size() == Annotated->size()) + Annotated = None; + } + bool accepts(StringRef ActualAnnotated) const { + return !Annotated || ActualAnnotated == *Annotated; } std::string Word; - StringRef Annotated; + + friend raw_ostream &operator<<(raw_ostream &OS, const ExpectedMatch &M) { + return OS << "'" << M.Word; + if (M.Annotated) + OS << "' as " << *M.Annotated; + } + +private: + Optional Annotated; }; -raw_ostream &operator<<(raw_ostream &OS, const ExpectedMatch &M) { - return OS << "'" << M.Word << "' as " << M.Annotated; -} struct MatchesMatcher : public testing::MatcherInterface { ExpectedMatch Candidate; @@ -47,7 +58,7 @@ FuzzyMatcher Matcher(Pattern); auto Result = Matcher.match(Candidate.Word); auto AnnotatedMatch = Matcher.dumpLast(*OS << "\n"); - return Result && AnnotatedMatch == Candidate.Annotated; + return Result && Candidate.accepts(AnnotatedMatch); } }; @@ -122,6 +133,7 @@ EXPECT_THAT("sl", matches("[S]Visual[L]oggerLogsList")); EXPECT_THAT("sllll", matches("[S]Visua[lL]ogger[L]ogs[L]ist")); EXPECT_THAT("Three", matches("H[T]ML[HRE]l[e]ment")); + EXPECT_THAT("b", Not(matches("NDEBUG"))); EXPECT_THAT("Three", matches("[Three]")); EXPECT_THAT("fo", Not(matches("barfoo"))); EXPECT_THAT("fo", matches("bar_[fo]o")); @@ -151,8 +163,9 @@ EXPECT_THAT("g", matches("zz[G]roup")); EXPECT_THAT("aaaa", matches("_a_[aaaa]")); // Prefer consecutive. - EXPECT_THAT("printf", matches("s[printf]")); - EXPECT_THAT("str", matches("o[str]eam")); + // These would ideally match, but would need special segmentation rules. + EXPECT_THAT("printf", Not(matches("s[printf]"))); + EXPECT_THAT("str", Not(matches("o[str]eam"))); } struct RankMatcher : public testing::MatcherInterface { @@ -188,9 +201,8 @@ llvm::raw_string_ostream Info(Buf); auto AnnotatedMatch = Matcher.dumpLast(Info); - if (AnnotatedMatch != Str.Annotated) { - *OS << "\nMatched " << Str.Word << " as " << AnnotatedMatch - << " instead of " << Str.Annotated << "\n" + if (!Str.accepts(AnnotatedMatch)) { + *OS << "\nDoesn't match " << Str << ", but " << AnnotatedMatch << "\n" << Info.str(); Ok = false; } else if (LastScore && *LastScore < *Score) { Index: unittests/clangd/IndexTests.cpp =================================================================== --- unittests/clangd/IndexTests.cpp +++ unittests/clangd/IndexTests.cpp @@ -116,14 +116,6 @@ EXPECT_TRUE(Symbols.expired()); } -TEST(MemIndexTest, MemIndexMatchSubstring) { - MemIndex I; - I.build(generateNumSymbols(5, 25)); - FuzzyFindRequest Req; - Req.Query = "5"; - EXPECT_THAT(match(I, Req), UnorderedElementsAre("5", "15", "25")); -} - TEST(MemIndexTest, MemIndexDeduplicate) { auto Symbols = generateNumSymbols(0, 10); @@ -166,46 +158,46 @@ TEST(MemIndexTest, MatchQualifiedNamesWithoutSpecificScope) { MemIndex I; - I.build(generateSymbols({"a::xyz", "b::yz", "yz"})); + I.build(generateSymbols({"a::y1", "b::y2", "y3"})); FuzzyFindRequest Req; Req.Query = "y"; - EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::xyz", "b::yz", "yz")); + EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3")); } TEST(MemIndexTest, MatchQualifiedNamesWithGlobalScope) { MemIndex I; - I.build(generateSymbols({"a::xyz", "b::yz", "yz"})); + I.build(generateSymbols({"a::y1", "b::y2", "y3"})); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {""}; - EXPECT_THAT(match(I, Req), UnorderedElementsAre("yz")); + EXPECT_THAT(match(I, Req), UnorderedElementsAre("y3")); } TEST(MemIndexTest, MatchQualifiedNamesWithOneScope) { MemIndex I; - I.build(generateSymbols({"a::xyz", "a::yy", "a::xz", "b::yz", "yz"})); + I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"})); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a"}; - EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::xyz", "a::yy")); + EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2")); } TEST(MemIndexTest, MatchQualifiedNamesWithMultipleScopes) { MemIndex I; - I.build(generateSymbols({"a::xyz", "a::yy", "a::xz", "b::yz", "yz"})); + I.build(generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"})); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a", "b"}; - EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::xyz", "a::yy", "b::yz")); + EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3")); } TEST(MemIndexTest, NoMatchNestedScopes) { MemIndex I; - I.build(generateSymbols({"a::xyz", "a::b::yy"})); + I.build(generateSymbols({"a::y1", "a::b::y2"})); FuzzyFindRequest Req; Req.Query = "y"; Req.Scopes = {"a"}; - EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::xyz")); + EXPECT_THAT(match(I, Req), UnorderedElementsAre("a::y1")); } TEST(MemIndexTest, IgnoreCases) {