Index: lib/Support/TrigramIndex.cpp =================================================================== --- lib/Support/TrigramIndex.cpp +++ lib/Support/TrigramIndex.cpp @@ -26,28 +26,41 @@ static const char RegexAdvancedMetachars[] = "()^$|+?[]\\{}"; -static bool isSimpleWildcard(StringRef Str) { - // Check for regex metacharacters other than '*' and '.'. - return Str.find_first_of(RegexAdvancedMetachars) == StringRef::npos; +static bool isAdvancedMetachar(unsigned Char) { + return strchr(RegexAdvancedMetachars, Char) != nullptr; } void TrigramIndex::insert(std::string Regex) { if (Defeated) return; - if (!isSimpleWildcard(Regex)) { - Defeated = true; - return; - } - std::set Was; unsigned Cnt = 0; unsigned Tri = 0; unsigned Len = 0; + bool Escaped = false; for (unsigned Char : Regex) { - if (Char == '.' || Char == '*') { - Tri = 0; - Len = 0; - continue; + if (!Escaped) { + // Regular expressions allow escaping symbols by preceding it with '\'. + if (Char == '\\') { + Escaped = true; + continue; + } + if (isAdvancedMetachar(Char)) { + // This is a more complicated regex than we can handle here. + Defeated = true; + return; + } + if (Char == '.' || Char == '*') { + Tri = 0; + Len = 0; + continue; + } + } + if (Escaped && Char >= '1' && Char <= '9') { + Defeated = true; + return; } + // We have already handled escaping and can reset the flag. + Escaped = false; Tri = ((Tri << 8) + Char) & 0xFFFFFF; Len++; if (Len < 3) Index: unittests/Support/SpecialCaseListTest.cpp =================================================================== --- unittests/Support/SpecialCaseListTest.cpp +++ unittests/Support/SpecialCaseListTest.cpp @@ -178,4 +178,15 @@ EXPECT_TRUE(SCL->inSection("fun", "aaaabbbaaa")); } +TEST_F(SpecialCaseListTest, EscapedSymbols) { + std::unique_ptr SCL = makeSpecialCaseList("src:*c\\+\\+abi*\n" + "src:*hello\\\\world*\n"); + EXPECT_TRUE(SCL->inSection("src", "dir/c++abi")); + EXPECT_FALSE(SCL->inSection("src", "dir/c\\+\\+abi")); + EXPECT_FALSE(SCL->inSection("src", "c\\+\\+abi")); + EXPECT_TRUE(SCL->inSection("src", "C:\\hello\\world")); + EXPECT_TRUE(SCL->inSection("src", "hello\\world")); + EXPECT_FALSE(SCL->inSection("src", "hello\\\\world")); +} + } Index: unittests/Support/TrigramIndexTest.cpp =================================================================== --- unittests/Support/TrigramIndexTest.cpp +++ unittests/Support/TrigramIndexTest.cpp @@ -94,9 +94,29 @@ EXPECT_TRUE(TI->isDefeated()); } -TEST_F(TrigramIndexTest, SpecialSymbol) { +TEST_F(TrigramIndexTest, EscapedSymbols) { std::unique_ptr TI = - makeTrigramIndex({"*c\\+\\+*"}); + makeTrigramIndex({"*c\\+\\+*", "*hello\\\\world*", "a\\tb", "a\\0b"}); + EXPECT_FALSE(TI->isDefeated()); + EXPECT_FALSE(TI->isDefinitelyOut("c++")); + EXPECT_TRUE(TI->isDefinitelyOut("c\\+\\+")); + EXPECT_FALSE(TI->isDefinitelyOut("hello\\world")); + EXPECT_TRUE(TI->isDefinitelyOut("hello\\\\world")); + EXPECT_FALSE(TI->isDefinitelyOut("atb")); + EXPECT_TRUE(TI->isDefinitelyOut("a\\tb")); + EXPECT_TRUE(TI->isDefinitelyOut("a\tb")); + EXPECT_FALSE(TI->isDefinitelyOut("a0b")); +} + +TEST_F(TrigramIndexTest, Backreference1) { + std::unique_ptr TI = + makeTrigramIndex({"*foo\\1*"}); + EXPECT_TRUE(TI->isDefeated()); +} + +TEST_F(TrigramIndexTest, Backreference2) { + std::unique_ptr TI = + makeTrigramIndex({"*foo\\2*"}); EXPECT_TRUE(TI->isDefeated()); }