diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -21,7 +21,7 @@ #include // This class represents a glob pattern. Supported metacharacters -// are "*", "?", "[]" and "[^]". +// are "*", "?", "\", "[]", "[^]", and "[!]". namespace llvm { class BitVector; template class ArrayRef; diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -19,7 +19,7 @@ using namespace llvm; static bool hasWildcard(StringRef S) { - return S.find_first_of("?*[") != StringRef::npos; + return S.find_first_of("?*[\\") != StringRef::npos; } // Expands character ranges and returns a bitmap. @@ -60,8 +60,9 @@ } // This is a scanner for the glob pattern. -// A glob pattern token is one of "*", "?", "[]", "[^]" -// (which is a negative form of "[]"), or a non-meta character. +// A glob pattern token is one of "*", "?", "\", "[]", "[^]" +// (which is a negative form of "[]"), "[!]" (which is +// equivalent to "[^]"), or a non-meta character. // This function returns the first token in S. static Expected scan(StringRef &S, StringRef Original) { switch (S[0]) { @@ -74,14 +75,16 @@ S = S.substr(1); return BitVector(256, true); case '[': { - size_t End = S.find(']', 1); + // ']' is allowed as the first character of a character class. '[]' is + // invalid. So, just skip the first character. + size_t End = S.find(']', 2); if (End == StringRef::npos) return make_error("invalid glob pattern: " + Original, errc::invalid_argument); StringRef Chars = S.substr(1, End - 1); S = S.substr(End + 1); - if (Chars.startswith("^")) { + if (Chars.startswith("^") || Chars.startswith("!")) { Expected BV = expand(Chars.substr(1), Original); if (!BV) return BV.takeError(); @@ -89,6 +92,11 @@ } return expand(Chars, Original); } + case '\\': + // Eat this character and fall through below to treat it like a non-meta + // character. + S = S.substr(1); + LLVM_FALLTHROUGH; default: BitVector BV(256, false); BV[(uint8_t)S[0]] = true; @@ -107,8 +115,9 @@ return Pat; } - // S is something like "foo*". We can use startswith(). - if (S.endswith("*") && !hasWildcard(S.drop_back())) { + // S is something like "foo*", and the "* is not escaped. We can use + // startswith(). + if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) { Pat.Prefix = S.drop_back(); return Pat; } diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp --- a/llvm/unittests/Support/GlobPatternTest.cpp +++ b/llvm/unittests/Support/GlobPatternTest.cpp @@ -14,57 +14,115 @@ class GlobPatternTest : public ::testing::Test {}; -TEST_F(GlobPatternTest, Basics) { +TEST_F(GlobPatternTest, Empty) { Expected Pat1 = GlobPattern::create(""); EXPECT_TRUE((bool)Pat1); EXPECT_TRUE(Pat1->match("")); EXPECT_FALSE(Pat1->match("a")); +} + +TEST_F(GlobPatternTest, Glob) { + Expected Pat1 = GlobPattern::create("ab*c*def"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("abcdef")); + EXPECT_TRUE(Pat1->match("abxcxdef")); + EXPECT_FALSE(Pat1->match("")); + EXPECT_FALSE(Pat1->match("xabcdef")); + EXPECT_FALSE(Pat1->match("abcdefx")); +} + +TEST_F(GlobPatternTest, Wildcard) { + Expected Pat1 = GlobPattern::create("a??c"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("axxc")); + EXPECT_FALSE(Pat1->match("axxx")); + EXPECT_FALSE(Pat1->match("")); +} - Expected Pat2 = GlobPattern::create("ab*c*def"); +TEST_F(GlobPatternTest, Escape) { + Expected Pat1 = GlobPattern::create("\\*"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("*")); + EXPECT_FALSE(Pat1->match("\\*")); + EXPECT_FALSE(Pat1->match("a")); + + Expected Pat2 = GlobPattern::create("a?\\?c"); EXPECT_TRUE((bool)Pat2); - EXPECT_TRUE(Pat2->match("abcdef")); - EXPECT_TRUE(Pat2->match("abxcxdef")); + EXPECT_TRUE(Pat2->match("ax?c")); + EXPECT_FALSE(Pat2->match("axxc")); EXPECT_FALSE(Pat2->match("")); - EXPECT_FALSE(Pat2->match("xabcdef")); - EXPECT_FALSE(Pat2->match("abcdefx")); - - Expected Pat3 = GlobPattern::create("a??c"); - EXPECT_TRUE((bool)Pat3); - EXPECT_TRUE(Pat3->match("axxc")); - EXPECT_FALSE(Pat3->match("axxx")); - EXPECT_FALSE(Pat3->match("")); - - Expected Pat4 = GlobPattern::create("[abc-fy-z]"); - EXPECT_TRUE((bool)Pat4); - EXPECT_TRUE(Pat4->match("a")); - EXPECT_TRUE(Pat4->match("b")); - EXPECT_TRUE(Pat4->match("c")); - EXPECT_TRUE(Pat4->match("d")); - EXPECT_TRUE(Pat4->match("e")); - EXPECT_TRUE(Pat4->match("f")); - EXPECT_TRUE(Pat4->match("y")); - EXPECT_TRUE(Pat4->match("z")); - EXPECT_FALSE(Pat4->match("g")); - EXPECT_FALSE(Pat4->match("")); - - Expected Pat5 = GlobPattern::create("[^abc-fy-z]"); - EXPECT_TRUE((bool)Pat5); - EXPECT_TRUE(Pat5->match("g")); - EXPECT_FALSE(Pat5->match("a")); - EXPECT_FALSE(Pat5->match("b")); - EXPECT_FALSE(Pat5->match("c")); - EXPECT_FALSE(Pat5->match("d")); - EXPECT_FALSE(Pat5->match("e")); - EXPECT_FALSE(Pat5->match("f")); - EXPECT_FALSE(Pat5->match("y")); - EXPECT_FALSE(Pat5->match("z")); - EXPECT_FALSE(Pat5->match("")); +} + +TEST_F(GlobPatternTest, BasicCharacterClass) { + Expected Pat1 = GlobPattern::create("[abc-fy-z]"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("a")); + EXPECT_TRUE(Pat1->match("b")); + EXPECT_TRUE(Pat1->match("c")); + EXPECT_TRUE(Pat1->match("d")); + EXPECT_TRUE(Pat1->match("e")); + EXPECT_TRUE(Pat1->match("f")); + EXPECT_TRUE(Pat1->match("y")); + EXPECT_TRUE(Pat1->match("z")); + EXPECT_FALSE(Pat1->match("g")); + EXPECT_FALSE(Pat1->match("")); +} + +TEST_F(GlobPatternTest, NegatedCharacterClass) { + Expected Pat1 = GlobPattern::create("[^abc-fy-z]"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("g")); + EXPECT_FALSE(Pat1->match("a")); + EXPECT_FALSE(Pat1->match("b")); + EXPECT_FALSE(Pat1->match("c")); + EXPECT_FALSE(Pat1->match("d")); + EXPECT_FALSE(Pat1->match("e")); + EXPECT_FALSE(Pat1->match("f")); + EXPECT_FALSE(Pat1->match("y")); + EXPECT_FALSE(Pat1->match("z")); + EXPECT_FALSE(Pat1->match("")); + + Expected Pat2 = GlobPattern::create("[!abc-fy-z]"); + EXPECT_TRUE((bool)Pat2); + EXPECT_TRUE(Pat2->match("g")); + EXPECT_FALSE(Pat2->match("a")); + EXPECT_FALSE(Pat2->match("b")); + EXPECT_FALSE(Pat2->match("c")); + EXPECT_FALSE(Pat2->match("d")); + EXPECT_FALSE(Pat2->match("e")); + EXPECT_FALSE(Pat2->match("f")); + EXPECT_FALSE(Pat2->match("y")); + EXPECT_FALSE(Pat2->match("z")); + EXPECT_FALSE(Pat2->match("")); +} + +TEST_F(GlobPatternTest, BracketFrontOfCharacterClass) { + Expected Pat1 = GlobPattern::create("[]a]x"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("]x")); + EXPECT_TRUE(Pat1->match("ax")); + EXPECT_FALSE(Pat1->match("a]x")); + EXPECT_FALSE(Pat1->match("")); +} + +TEST_F(GlobPatternTest, SpecialCharsInCharacterClass) { + Expected Pat1 = GlobPattern::create("[*?^]"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("*")); + EXPECT_TRUE(Pat1->match("?")); + EXPECT_TRUE(Pat1->match("^")); + EXPECT_FALSE(Pat1->match("*?^")); + EXPECT_FALSE(Pat1->match("")); } TEST_F(GlobPatternTest, Invalid) { Expected Pat1 = GlobPattern::create("["); EXPECT_FALSE((bool)Pat1); handleAllErrors(Pat1.takeError(), [&](ErrorInfoBase &EIB) {}); + + Expected Pat2 = GlobPattern::create("[]"); + EXPECT_FALSE((bool)Pat2); + handleAllErrors(Pat2.takeError(), [&](ErrorInfoBase &EIB) {}); } TEST_F(GlobPatternTest, ExtSym) {