Index: clang-tools-extra/trunk/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/trunk/clangd/CMakeLists.txt +++ clang-tools-extra/trunk/clangd/CMakeLists.txt @@ -8,6 +8,7 @@ ClangdUnit.cpp ClangdUnitStore.cpp DraftStore.cpp + FuzzyMatch.cpp GlobalCompilationDatabase.cpp JSONExpr.cpp JSONRPCDispatcher.cpp Index: clang-tools-extra/trunk/clangd/FuzzyMatch.h =================================================================== --- clang-tools-extra/trunk/clangd/FuzzyMatch.h +++ clang-tools-extra/trunk/clangd/FuzzyMatch.h @@ -0,0 +1,84 @@ +//===--- FuzzyMatch.h - Approximate identifier matching ---------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements fuzzy-matching of strings against identifiers. +// It indicates both the existence and quality of a match: +// 'eb' matches both 'emplace_back' and 'embed', the former has a better score. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FUZZYMATCH_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FUZZYMATCH_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace clangd { + +// A matcher capable of matching and scoring strings against a single pattern. +// It's optimized for matching against many strings - match() does not allocate. +class FuzzyMatcher { +public: + // Characters beyond MaxPat are ignored. + FuzzyMatcher(llvm::StringRef Pattern); + + // If Word matches the pattern, return a score in [0,1] (higher is better). + // Characters beyond MaxWord are ignored. + llvm::Optional match(llvm::StringRef Word); + + // Dump internal state from the last match() to the stream, for debugging. + // Returns the pattern with [] around matched characters, e.g. + // [u_p] + "unique_ptr" --> "[u]nique[_p]tr" + llvm::SmallString<256> dumpLast(llvm::raw_ostream &) const; + +private: + // We truncate the pattern and the word to bound the cost of matching. + constexpr static int MaxPat = 63, MaxWord = 127; + enum CharRole : char; // For segmentation. + enum CharType : char; // For segmentation. + enum Action { Miss = 0, Match = 1 }; + + bool init(llvm::StringRef Word); + void buildGraph(); + void calculateRoles(const char *Text, CharRole *Out, int N); + int skipPenalty(int W, Action Last); + int matchBonus(int P, int W, Action Last); + + // Pattern data is initialized by the constructor, then constant. + char Pat[MaxPat]; // Pattern data + int PatN; // Length + char LowPat[MaxPat]; // Pattern in lowercase + CharRole PatRole[MaxPat]; // Pattern segmentation info + bool CaseSensitive; // Case-sensitive match if pattern has uppercase + float ScoreScale; // Normalizes scores for the pattern length. + + // Word data is initialized on each call to match(), mostly by init(). + char Word[MaxWord]; // Word data + int WordN; // Length + char LowWord[MaxWord]; // Word in lowercase + CharRole WordRole[MaxWord]; // Word segmentation info + bool WordContainsPattern; // Simple substring check + + // Cumulative best-match score table. + // Boundary conditions are filled in by the constructor. + // The rest is repopulated for each match(), by buildGraph(). + struct ScoreInfo { + signed int Score : 15; + Action Prev : 1; + }; + ScoreInfo Scores[MaxPat + 1][MaxWord + 1][/* Last Action */ 2]; +}; + +} // namespace clangd +} // namespace clang + +#endif Index: clang-tools-extra/trunk/clangd/FuzzyMatch.cpp =================================================================== --- clang-tools-extra/trunk/clangd/FuzzyMatch.cpp +++ clang-tools-extra/trunk/clangd/FuzzyMatch.cpp @@ -0,0 +1,373 @@ +//===--- FuzzyMatch.h - Approximate identifier matching ---------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// To check for a match between a Pattern ('u_p') and a Word ('unique_ptr'), +// we consider the possible partial match states: +// +// u n i q u e _ p t r +// +--------------------- +// |A . . . . . . . . . . +// u| +// |. . . . . . . . . . . +// _| +// |. . . . . . . O . . . +// p| +// |. . . . . . . . . . B +// +// Each dot represents some prefix of the pattern being matched against some +// prefix of the word. +// - A is the initial state: '' matched against '' +// - O is an intermediate state: 'u_' matched against 'unique_' +// - B is the target state: 'u_p' matched against 'unique_ptr' +// +// We aim to find the best path from A->B. +// - Moving right (consuming a word character) +// Always legal: not all word characters must match. +// - Moving diagonally (consuming both a word and pattern character) +// Legal if the characters match. +// - Moving down (consuming a pattern character) is never legal. +// Never legal: all pattern characters must match something. +// +// The scoring is based on heuristics: +// - when matching a character, apply a bonus or penalty depending on the +// match quality (does case match, do word segments align, etc) +// - when skipping a character, apply a penalty if it hurts the match +// (it starts a word segment, or splits the matched region, etc) +// +// These heuristics require the ability to "look backward" one character, to +// see whether it was matched or not. Therefore the dynamic-programming matrix +// has an extra dimension (last character matched). +// Each entry also has an additional flag indicating whether the last-but-one +// character matched, which is needed to trace back through the scoring table +// and reconstruct the match. +// +// We treat strings as byte-sequences, so only ASCII has first-class support. +// +// This algorithm was inspired by VS code's client-side filtering, and aims +// to be mostly-compatible. +// +//===----------------------------------------------------------------------===// + +#include "FuzzyMatch.h" +#include "llvm/ADT/Optional.h" +#include "llvm/Support/Format.h" + +using namespace llvm; +using namespace clang::clangd; + +const int FuzzyMatcher::MaxPat; +const int FuzzyMatcher::MaxWord; + +static char lower(char C) { return C >= 'A' && C <= 'Z' ? C + ('a' - 'A') : C; } +// A "negative infinity" score that won't overflow. +// We use this to mark unreachable states and forbidden solutions. +// Score field is 15 bits wide, min value is -2^14, we use half of that. +static constexpr int AwfulScore = -(1 << 13); +static bool isAwful(int S) { return S < AwfulScore / 2; } +static constexpr int PerfectBonus = 3; // Perfect per-pattern-char score. + +FuzzyMatcher::FuzzyMatcher(StringRef Pattern) + : PatN(std::min(MaxPat, Pattern.size())), CaseSensitive(false), + ScoreScale(float{1} / (PerfectBonus * PatN)), WordN(0) { + memcpy(Pat, Pattern.data(), PatN); + for (int I = 0; I < PatN; ++I) { + LowPat[I] = lower(Pat[I]); + CaseSensitive |= LowPat[I] != Pat[I]; + } + Scores[0][0][Miss] = {0, Miss}; + Scores[0][0][Match] = {AwfulScore, Miss}; + for (int P = 0; P <= PatN; ++P) + for (int W = 0; W < P; ++W) + for (Action A : {Miss, Match}) + Scores[P][W][A] = {AwfulScore, Miss}; + calculateRoles(Pat, PatRole, PatN); +} + +Optional FuzzyMatcher::match(StringRef Word) { + if (!PatN) + return 1; + if (!(WordContainsPattern = init(Word))) + return None; + buildGraph(); + auto Best = std::max(Scores[PatN][WordN][Miss].Score, + Scores[PatN][WordN][Match].Score); + if (isAwful(Best)) + return None; + return ScoreScale * std::min(PerfectBonus * PatN, std::max(0, Best)); +} + +// Segmentation of words and patterns. +// A name like "fooBar_baz" consists of several parts foo, bar, baz. +// Aligning segmentation of word and pattern improves the fuzzy-match. +// For example: [lol] matches "LaughingOutLoud" better than "LionPopulation" +// +// First we classify each character into types (uppercase, lowercase, etc). +// Then we look at the sequence: e.g. [upper, lower] is the start of a segment. + +// We only distinguish the types of characters that affect segmentation. +// It's not obvious how to segment digits, we treat them as lowercase letters. +// As we don't decode UTF-8, we treat bytes over 127 as lowercase too. +// This means we require exact (case-sensitive) match. +enum FuzzyMatcher::CharType : char { + Empty = 0, // Before-the-start and after-the-end (and control chars). + Lower = 1, // Lowercase letters, digits, and non-ASCII bytes. + Upper = 2, // Uppercase letters. + Punctuation = 3, // ASCII punctuation (including Space) +}; + +// We get CharTypes from a lookup table. Each is 2 bits, 4 fit in each byte. +// The top 6 bits of the char select the byte, the bottom 2 select the offset. +// e.g. 'q' = 010100 01 = byte 28 (55), bits 3-2 (01) -> Lower. +constexpr static uint8_t CharTypes[] = { + 0x00, 0x00, 0x00, 0x00, // Control characters + 0x00, 0x00, 0x00, 0x00, // Control characters + 0xff, 0xff, 0xff, 0xff, // Punctuation + 0x55, 0x55, 0xf5, 0xff, // Numbers->Lower, more Punctuation. + 0xab, 0xaa, 0xaa, 0xaa, // @ and A-O + 0xaa, 0xaa, 0xea, 0xff, // P-Z, more Punctuation. + 0x57, 0x55, 0x55, 0x55, // ` and a-o + 0x55, 0x55, 0xd5, 0x3f, // p-z, Punctuation, DEL. + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, // Bytes over 127 -> Lower. + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, // (probably UTF-8). + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, +}; + +// Each character's Role is the Head or Tail of a segment, or a Separator. +// e.g. XMLHttpRequest_Async +// +--+---+------ +---- +// ^Head ^Tail ^Separator +enum FuzzyMatcher::CharRole : char { + Unknown = 0, // Stray control characters or impossible states. + Tail = 1, // Part of a word segment, but not the first character. + Head = 2, // The first character of a word segment. + Separator = 3, // Punctuation characters that separate word segments. +}; + +// The Role can be determined from the Type of a character and its neighbors: +// +// Example | Chars | Type | Role +// ---------+--------------+----- +// F(o)oBar | Foo | Ull | Tail +// Foo(B)ar | oBa | lUl | Head +// (f)oo | ^fo | Ell | Head +// H(T)TP | HTT | UUU | Tail +// +// Our lookup table maps a 6 bit key (Prev, Curr, Next) to a 2-bit Role. +// A byte packs 4 Roles. (Prev, Curr) selects a byte, Next selects the offset. +// e.g. Lower, Upper, Lower -> 01 10 01 -> byte 6 (aa), bits 3-2 (10) -> Head. +constexpr static uint8_t CharRoles[] = { + // clang-format off + // Curr= Empty Lower Upper Separ + /* Prev=Empty */ 0x00, 0xaa, 0xaa, 0xff, // At start, Lower|Upper->Head + /* Prev=Lower */ 0x00, 0x55, 0xaa, 0xff, // In word, Upper->Head;Lower->Tail + /* Prev=Upper */ 0x00, 0x55, 0x59, 0xff, // Ditto, but U(U)U->Tail + /* Prev=Separ */ 0x00, 0xaa, 0xaa, 0xff, // After separator, like at start + // clang-format on +}; + +template static T packedLookup(const uint8_t *Data, int I) { + return static_cast((Data[I >> 2] >> ((I & 3) * 2)) & 3); +} +void FuzzyMatcher::calculateRoles(const char *Text, CharRole *Out, int N) { + // Types holds a sliding window of (Prev, Curr, Next) types. + // Initial value is (Empty, Empty, type of Text[0]). + int Types = packedLookup(CharTypes, Text[0]); + // Rotate slides in the type of the next character. + auto Rotate = [&](CharType T) { Types = ((Types << 2) | T) & 0x3f; }; + for (int I = 0; I < N - 1; ++I) { + // For each character, rotate in the next, and look up the role. + Rotate(packedLookup(CharTypes, Text[I + 1])); + *Out++ = packedLookup(CharRoles, Types); + } + // For the last character, the "next character" is Empty. + Rotate(Empty); + *Out++ = packedLookup(CharRoles, Types); +} + +// Sets up the data structures matching Word. +// Returns false if we can cheaply determine that no match is possible. +bool FuzzyMatcher::init(StringRef NewWord) { + WordN = std::min(MaxWord, NewWord.size()); + if (PatN > WordN) + return false; + memcpy(Word, NewWord.data(), WordN); + for (int I = 0; I < WordN; ++I) + LowWord[I] = lower(Word[I]); + + // Cheap subsequence check. + for (int W = 0, P = 0; P != PatN; ++W) { + if (W == WordN) + return false; + if (LowWord[W] == LowPat[P]) + ++P; + } + + calculateRoles(Word, WordRole, WordN); + return true; +} + +// The forwards pass finds the mappings of Pattern onto Word. +// Score = best score achieved matching Word[..W] against Pat[..P]. +// Unlike other tables, indices range from 0 to N *inclusive* +// Matched = whether we chose to match Word[W] with Pat[P] or not. +// +// Points are mostly assigned to matched characters, with 1 being a good score +// and 3 being a great one. So we treat the score range as [0, 3 * PatN]. +// This range is not strict: we can apply larger bonuses/penalties, or penalize +// non-matched characters. +void FuzzyMatcher::buildGraph() { + for (int W = 0; W < WordN; ++W) { + Scores[0][W + 1][Miss] = {Scores[0][W][Miss].Score - skipPenalty(W, Miss), + Miss}; + Scores[0][W + 1][Match] = {AwfulScore, Miss}; + } + for (int P = 0; P < PatN; ++P) { + for (int W = P; W < WordN; ++W) { + auto &Score = Scores[P + 1][W + 1], &PreMiss = Scores[P + 1][W]; + + auto MatchMissScore = PreMiss[Match].Score; + auto MissMissScore = PreMiss[Miss].Score; + if (P < PatN - 1) { // Skipping trailing characters is always free. + MatchMissScore -= skipPenalty(W, Match); + MissMissScore -= skipPenalty(W, Miss); + } + Score[Miss] = (MatchMissScore > MissMissScore) + ? ScoreInfo{MatchMissScore, Match} + : ScoreInfo{MissMissScore, Miss}; + + if (LowPat[P] != LowWord[W]) { // No match possible. + Score[Match] = {AwfulScore, Miss}; + } else { + auto &PreMatch = Scores[P][W]; + auto MatchMatchScore = PreMatch[Match].Score + matchBonus(P, W, Match); + auto MissMatchScore = PreMatch[Miss].Score + matchBonus(P, W, Miss); + Score[Match] = (MatchMatchScore > MissMatchScore) + ? ScoreInfo{MatchMatchScore, Match} + : ScoreInfo{MissMatchScore, Miss}; + } + } + } +} + +int FuzzyMatcher::skipPenalty(int W, Action Last) { + int S = 0; + if (WordRole[W] == Head) // Skipping a segment. + S += 1; + if (Last == Match) // Non-consecutive match. + S += 2; // We'd rather skip a segment than split our match. + return S; +} + +int FuzzyMatcher::matchBonus(int P, int W, Action Last) { + assert(LowPat[P] == LowWord[W]); + int S = 1; + // Bonus: pattern so far is a (case-insensitive) prefix of the word. + if (P == W) // We can't skip pattern characters, so we must have matched all. + ++S; + // Bonus: case matches, or a Head in the pattern aligns with one in the word. + if ((Pat[P] == Word[W] && (CaseSensitive || P == W)) || + (PatRole[P] == Head && WordRole[W] == Head)) + ++S; + // Penalty: matching inside a segment (and previous char wasn't matched). + if (WordRole[W] == Tail && P && Last == Miss) + S -= 3; + // Penalty: a Head in the pattern matches in the middle of a word segment. + if (PatRole[P] == Head && WordRole[W] == Tail) + --S; + // Penalty: matching the first pattern character in the middle of a segment. + if (P == 0 && WordRole[W] == Tail) + S -= 4; + assert(S <= PerfectBonus); + return S; +} + +llvm::SmallString<256> FuzzyMatcher::dumpLast(llvm::raw_ostream &OS) const { + llvm::SmallString<256> Result; + OS << "=== Match \"" << StringRef(Word, WordN) << "\" against [" + << StringRef(Pat, PatN) << "] ===\n"; + if (!WordContainsPattern) { + OS << "Substring check failed.\n"; + return Result; + } else if (isAwful(std::max(Scores[PatN][WordN][Match].Score, + Scores[PatN][WordN][Miss].Score))) { + OS << "Substring check passed, but all matches are forbidden\n"; + } + if (!CaseSensitive) + OS << "Lowercase query, so scoring ignores case\n"; + + // Traverse Matched table backwards to reconstruct the Pattern/Word mapping. + // The Score table has cumulative scores, subtracting along this path gives + // us the per-letter scores. + Action Last = + (Scores[PatN][WordN][Match].Score > Scores[PatN][WordN][Miss].Score) + ? Match + : Miss; + int S[MaxWord]; + Action A[MaxWord]; + for (int W = WordN - 1, P = PatN - 1; W >= 0; --W) { + A[W] = Last; + const auto &Cell = Scores[P + 1][W + 1][Last]; + if (Last == Match) + --P; + const auto &Prev = Scores[P + 1][W][Cell.Prev]; + S[W] = Cell.Score - Prev.Score; + Last = Cell.Prev; + } + for (int I = 0; I < WordN; ++I) { + if (A[I] == Match && (I == 0 || A[I - 1] == Miss)) + Result.push_back('['); + if (A[I] == Miss && I > 0 && A[I - 1] == Match) + Result.push_back(']'); + Result.push_back(Word[I]); + } + if (A[WordN - 1] == Match) + Result.push_back(']'); + + for (char C : StringRef(Word, WordN)) + OS << " " << C << " "; + OS << "\n"; + for (int I = 0, J = 0; I < WordN; I++) + OS << " " << (A[I] == Match ? Pat[J++] : ' ') << " "; + OS << "\n"; + for (int I = 0; I < WordN; I++) + OS << format("%2d ", S[I]); + OS << "\n"; + + OS << "\nSegmentation:"; + OS << "\n'" << StringRef(Word, WordN) << "'\n "; + for (int I = 0; I < WordN; ++I) + OS << "?-+ "[static_cast(WordRole[I])]; + OS << "\n[" << StringRef(Pat, PatN) << "]\n "; + for (int I = 0; I < PatN; ++I) + OS << "?-+ "[static_cast(PatRole[I])]; + OS << "\n"; + + OS << "\nScoring table (last-Miss, last-Match):\n"; + OS << " | "; + for (char C : StringRef(Word, WordN)) + OS << " " << C << " "; + OS << "\n"; + OS << "-+----" << std::string(WordN * 4, '-') << "\n"; + for (int I = 0; I <= PatN; ++I) { + for (Action A : {Miss, Match}) { + OS << ((I && A == Miss) ? Pat[I - 1] : ' ') << "|"; + for (int J = 0; J <= WordN; ++J) { + if (!isAwful(Scores[I][J][A].Score)) + OS << format("%3d%c", Scores[I][J][A].Score, + Scores[I][J][A].Prev == Match ? '*' : ' '); + else + OS << " "; + } + OS << "\n"; + } + } + + return Result; +} Index: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt +++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt @@ -10,6 +10,7 @@ add_extra_unittest(ClangdTests ClangdTests.cpp + FuzzyMatchTests.cpp JSONExprTests.cpp TraceTests.cpp ) Index: clang-tools-extra/trunk/unittests/clangd/FuzzyMatchTests.cpp =================================================================== --- clang-tools-extra/trunk/unittests/clangd/FuzzyMatchTests.cpp +++ clang-tools-extra/trunk/unittests/clangd/FuzzyMatchTests.cpp @@ -0,0 +1,252 @@ +//===-- FuzzyMatchTests.cpp - String fuzzy matcher tests --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "FuzzyMatch.h" + +#include "llvm/ADT/StringExtras.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { +using namespace llvm; +using testing::Not; + +struct ExpectedMatch { + ExpectedMatch(StringRef Annotated) : Word(Annotated), Annotated(Annotated) { + for (char C : "[]") + Word.erase(std::remove(Word.begin(), Word.end(), C), Word.end()); + } + std::string Word; + StringRef Annotated; +}; +raw_ostream &operator<<(raw_ostream &OS, const ExpectedMatch &M) { + return OS << "'" << M.Word << "' as " << M.Annotated; +} + +struct MatchesMatcher : public testing::MatcherInterface { + ExpectedMatch Candidate; + MatchesMatcher(ExpectedMatch Candidate) : Candidate(std::move(Candidate)) {} + + void DescribeTo(::std::ostream *OS) const override { + raw_os_ostream(*OS) << "Matches " << Candidate; + } + + bool MatchAndExplain(StringRef Pattern, + testing::MatchResultListener *L) const override { + std::unique_ptr OS( + L->stream() ? (raw_ostream *)(new raw_os_ostream(*L->stream())) + : new raw_null_ostream()); + FuzzyMatcher Matcher(Pattern); + auto Result = Matcher.match(Candidate.Word); + auto AnnotatedMatch = Matcher.dumpLast(*OS << "\n"); + return Result && AnnotatedMatch == Candidate.Annotated; + } +}; + +// Accepts patterns that match a given word. +// Dumps the debug tables on match failure. +testing::Matcher matches(StringRef M) { + return testing::MakeMatcher(new MatchesMatcher(M)); +} + +TEST(FuzzyMatch, Matches) { + EXPECT_THAT("u_p", matches("[u]nique[_p]tr")); + EXPECT_THAT("up", matches("[u]nique_[p]tr")); + EXPECT_THAT("uq", matches("[u]ni[q]ue_ptr")); + EXPECT_THAT("qp", Not(matches("unique_ptr"))); + EXPECT_THAT("log", Not(matches("SVGFEMorphologyElement"))); + + EXPECT_THAT("tit", matches("win.[tit]")); + EXPECT_THAT("title", matches("win.[title]")); + EXPECT_THAT("WordCla", matches("[Word]Character[Cla]ssifier")); + EXPECT_THAT("WordCCla", matches("[WordC]haracter[Cla]ssifier")); + + EXPECT_THAT("dete", Not(matches("editor.quickSuggestionsDelay"))); + + EXPECT_THAT("highlight", matches("editorHover[Highlight]")); + EXPECT_THAT("hhighlight", matches("editor[H]over[Highlight]")); + EXPECT_THAT("dhhighlight", Not(matches("editorHoverHighlight"))); + + EXPECT_THAT("-moz", matches("[-moz]-foo")); + EXPECT_THAT("moz", matches("-[moz]-foo")); + EXPECT_THAT("moza", matches("-[moz]-[a]nimation")); + + EXPECT_THAT("ab", matches("[ab]A")); + EXPECT_THAT("ccm", matches("[c]a[cm]elCase")); + EXPECT_THAT("bti", Not(matches("the_black_knight"))); + EXPECT_THAT("ccm", Not(matches("camelCase"))); + EXPECT_THAT("cmcm", Not(matches("camelCase"))); + EXPECT_THAT("BK", matches("the_[b]lack_[k]night")); + EXPECT_THAT("KeyboardLayout=", Not(matches("KeyboardLayout"))); + EXPECT_THAT("LLL", matches("SVisual[L]ogger[L]ogs[L]ist")); + EXPECT_THAT("LLLL", Not(matches("SVilLoLosLi"))); + EXPECT_THAT("LLLL", Not(matches("SVisualLoggerLogsList"))); + EXPECT_THAT("TEdit", matches("[T]ext[Edit]")); + EXPECT_THAT("TEdit", matches("[T]ext[Edit]or")); + EXPECT_THAT("TEdit", matches("[Te]xte[dit]")); + EXPECT_THAT("TEdit", matches("[t]ext_[edit]")); + EXPECT_THAT("TEditDit", matches("[T]ext[Edit]or[D]ecorat[i]on[T]ype")); + EXPECT_THAT("TEdit", matches("[T]ext[Edit]orDecorationType")); + EXPECT_THAT("Tedit", matches("[T]ext[Edit]")); + EXPECT_THAT("ba", Not(matches("?AB?"))); + EXPECT_THAT("bkn", matches("the_[b]lack_[kn]ight")); + EXPECT_THAT("bt", matches("the_[b]lack_knigh[t]")); + EXPECT_THAT("ccm", matches("[c]amelCase[cm]")); + EXPECT_THAT("fdm", matches("[f]in[dM]odel")); + EXPECT_THAT("fob", matches("[fo]o[b]ar")); + EXPECT_THAT("fobz", Not(matches("foobar"))); + EXPECT_THAT("foobar", matches("[foobar]")); + EXPECT_THAT("form", matches("editor.[form]atOnSave")); + EXPECT_THAT("g p", matches("[G]it:[ P]ull")); + EXPECT_THAT("g p", matches("[G]it:[ P]ull")); + EXPECT_THAT("gip", matches("[Gi]t: [P]ull")); + EXPECT_THAT("gip", matches("[Gi]t: [P]ull")); + EXPECT_THAT("gp", matches("[G]it: [P]ull")); + EXPECT_THAT("gp", matches("[G]it_Git_[P]ull")); + EXPECT_THAT("is", matches("[I]mport[S]tatement")); + EXPECT_THAT("is", matches("[is]Valid")); + EXPECT_THAT("lowrd", matches("[low]Wo[rd]")); + EXPECT_THAT("myvable", matches("[myva]ria[ble]")); + EXPECT_THAT("no", Not(matches(""))); + EXPECT_THAT("no", Not(matches("match"))); + EXPECT_THAT("ob", Not(matches("foobar"))); + EXPECT_THAT("sl", matches("[S]Visual[L]oggerLogsList")); + EXPECT_THAT("sllll", matches("[S]Visua[lL]ogger[L]ogs[L]ist")); + EXPECT_THAT("Three", matches("H[T]ML[HRE]l[e]ment")); + EXPECT_THAT("Three", matches("[Three]")); + EXPECT_THAT("fo", Not(matches("barfoo"))); + EXPECT_THAT("fo", matches("bar_[fo]o")); + EXPECT_THAT("fo", matches("bar_[Fo]o")); + EXPECT_THAT("fo", matches("bar [fo]o")); + EXPECT_THAT("fo", matches("bar.[fo]o")); + EXPECT_THAT("fo", matches("bar/[fo]o")); + EXPECT_THAT("fo", matches("bar\\[fo]o")); + + EXPECT_THAT( + "aaaaaa", + matches("[aaaaaa]aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")); + EXPECT_THAT("baba", Not(matches("ababababab"))); + EXPECT_THAT("fsfsfs", Not(matches("dsafdsafdsafdsafdsafdsafdsafasdfdsa"))); + EXPECT_THAT("fsfsfsfsfsfsfsf", + Not(matches("dsafdsafdsafdsafdsafdsafdsafasdfdsafdsafdsafdsafdsfd" + "safdsfdfdfasdnfdsajfndsjnafjndsajlknfdsa"))); + + EXPECT_THAT(" g", matches("[ g]roup")); + EXPECT_THAT("g", matches(" [g]roup")); + EXPECT_THAT("g g", Not(matches(" groupGroup"))); + EXPECT_THAT("g g", matches(" [g]roup[ G]roup")); + EXPECT_THAT(" g g", matches("[ ] [g]roup[ G]roup")); + EXPECT_THAT("zz", matches("[zz]Group")); + EXPECT_THAT("zzg", matches("[zzG]roup")); + EXPECT_THAT("g", matches("zz[G]roup")); + + EXPECT_THAT("aaaa", matches("_a_[aaaa]")); // Prefer consecutive. + EXPECT_THAT("printf", matches("s[printf]")); + EXPECT_THAT("str", matches("o[str]eam")); +} + +struct RankMatcher : public testing::MatcherInterface { + std::vector RankedStrings; + RankMatcher(std::initializer_list RankedStrings) + : RankedStrings(RankedStrings) {} + + void DescribeTo(::std::ostream *OS) const override { + raw_os_ostream O(*OS); + O << "Ranks strings in order: ["; + for (const auto &Str : RankedStrings) + O << "\n\t" << Str; + O << "\n]"; + } + + bool MatchAndExplain(StringRef Pattern, + testing::MatchResultListener *L) const override { + std::unique_ptr OS( + L->stream() ? (raw_ostream *)(new raw_os_ostream(*L->stream())) + : new raw_null_ostream()); + FuzzyMatcher Matcher(Pattern); + const ExpectedMatch *LastMatch; + Optional LastScore; + bool Ok = true; + for (const auto &Str : RankedStrings) { + auto Score = Matcher.match(Str.Word); + if (!Score) { + *OS << "\nDoesn't match '" << Str.Word << "'"; + Matcher.dumpLast(*OS << "\n"); + Ok = false; + } else { + std::string Buf; + llvm::raw_string_ostream Info(Buf); + auto AnnotatedMatch = Matcher.dumpLast(Info); + + if (AnnotatedMatch != Str.Annotated) { + *OS << "\nMatched " << Str.Word << " as " << AnnotatedMatch + << " instead of " << Str.Annotated << "\n" + << Info.str(); + Ok = false; + } else if (LastScore && *LastScore < *Score) { + *OS << "\nRanks '" << Str.Word << "'=" << *Score << " above '" + << LastMatch->Word << "'=" << *LastScore << "\n" + << Info.str(); + Matcher.match(LastMatch->Word); + Matcher.dumpLast(*OS << "\n"); + Ok = false; + } + } + LastMatch = &Str; + LastScore = Score; + } + return Ok; + } +}; + +// Accepts patterns that match all the strings and rank them in the given order. +// Dumps the debug tables on match failure. +template testing::Matcher ranks(T... RankedStrings) { + return testing::MakeMatcher( + new RankMatcher{ExpectedMatch(RankedStrings)...}); +} + +TEST(FuzzyMatch, Ranking) { + EXPECT_THAT("eb", ranks("[e]mplace_[b]ack", "[e]m[b]ed")); + EXPECT_THAT("cons", + ranks("[cons]ole", "[Cons]ole", "ArrayBuffer[Cons]tructor")); + EXPECT_THAT("foo", ranks("[foo]", "[Foo]")); + EXPECT_THAT("onMess", + ranks("[onMess]age", "[onmess]age", "[on]This[M]ega[Es]cape[s]")); + EXPECT_THAT("CC", ranks("[C]amel[C]ase", "[c]amel[C]ase")); + EXPECT_THAT("cC", ranks("[c]amel[C]ase", "[C]amel[C]ase")); + EXPECT_THAT("p", ranks("[p]arse", "[p]osix", "[p]afdsa", "[p]ath", "[p]")); + EXPECT_THAT("pa", ranks("[pa]rse", "[pa]th", "[pa]fdsa")); + EXPECT_THAT("log", ranks("[log]", "Scroll[Log]icalPosition")); + EXPECT_THAT("e", ranks("[e]lse", "Abstract[E]lement")); + EXPECT_THAT("workbench.sideb", + ranks("[workbench.sideB]ar.location", + "[workbench.]editor.default[SideB]ySideLayout")); + EXPECT_THAT("editor.r", ranks("[editor.r]enderControlCharacter", + "[editor.]overview[R]ulerlanes", + "diff[Editor.r]enderSideBySide")); + EXPECT_THAT("-mo", ranks("[-mo]z-columns", "[-]ms-ime-[mo]de")); + EXPECT_THAT("convertModelPosition", + ranks("[convertModelPosition]ToViewPosition", + "[convert]ViewTo[ModelPosition]")); + EXPECT_THAT("is", ranks("[is]ValidViewletId", "[i]mport [s]tatement")); + EXPECT_THAT("title", ranks("window.[title]", + "files.[t]r[i]m[T]rai[l]ingWhit[e]space")); + EXPECT_THAT("strcpy", ranks("[strcpy]", "[strcpy]_s", "[str]n[cpy]")); + EXPECT_THAT("close", ranks("workbench.quickOpen.[close]OnFocusOut", + "[c]ss.[l]int.imp[o]rt[S]tat[e]ment", + "[c]ss.co[lo]rDecorator[s].[e]nable")); +} + +} // namespace +} // namespace clangd +} // namespace clang