Index: include/llvm/Support/Regex.h =================================================================== --- include/llvm/Support/Regex.h +++ include/llvm/Support/Regex.h @@ -47,7 +47,12 @@ /// /// \param Regex - referenced string is no longer needed after this /// constructor does finish. Only its compiled form is kept stored. - Regex(StringRef Regex, unsigned Flags = NoFlags); + /// + /// \param Error - If non-null, any errors in regex compilation will be + /// recorded as a non-empty string. If there is no error, it will be an + /// empty string. + Regex(StringRef Regex, unsigned Flags = NoFlags, + std::string *Error = nullptr); Regex(const Regex &) = delete; Regex &operator=(Regex regex) { std::swap(preg, regex.preg); @@ -80,6 +85,23 @@ bool match(StringRef String, SmallVectorImpl *Matches = nullptr, std::string *Error = nullptr) const; + /// Syntactical sugar to create a temporary Regex and call match() on it. + /// + /// Assuming no regex compilation errors, equivalent to the following: + /// + /// Regex(RegexPattern, Flags, Error).match(String, Matches, Error) + /// + /// However, unlike the above, this doesn't call match() if the constructor + /// reports an error. + /// + /// This returns true only when both the regex is valid and the match is + /// also successful. If \p Error is non-null, it will be set to a non-empty + /// string if the regex is invalid or if an error happened during match(). + /// If there is no error, it will be an empty string. + static bool match(StringRef RegexPattern, StringRef String, + SmallVectorImpl *Matches = nullptr, + unsigned Flags = NoFlags, std::string *Error = nullptr); + /// sub - Return the result of replacing the first match of the regex in /// \p String with the \p Repl string. Backreferences like "\0" in the /// replacement string are replaced with the appropriate match substring. @@ -93,6 +115,22 @@ std::string sub(StringRef Repl, StringRef String, std::string *Error = nullptr) const; + /// Syntactical sugar to create a temporary Regex and call sub() on it. + /// + /// Assuming no regex compilation errors, equivalent to the following: + /// + /// Regex(RegexPattern, Flags, Error).sub(Repl, String, Error) + /// + /// However, unlike the above, this doesn't call sub() if the constructor + /// reports an error. + /// + /// If \p Error is non-null, it will be set to a non-empty string if the + /// regex is invalid or if an error happened during sub(). If there is no + /// error, it will be an empty string. + static std::string sub(StringRef RegexPattern, StringRef Repl, + StringRef String, unsigned Flags = NoFlags, + std::string *Error = nullptr); + /// If this function returns true, ^Str$ is an extended regular /// expression that matches Str and only Str. static bool isLiteralERE(StringRef Str); Index: lib/Support/FileCheck.cpp =================================================================== --- lib/Support/FileCheck.cpp +++ lib/Support/FileCheck.cpp @@ -656,7 +656,7 @@ } SmallVector MatchInfo; - if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) + if (!Regex::match(RegExToMatch, Buffer, &MatchInfo, Regex::Newline)) return make_error(); // Successful regex match. Index: lib/Support/Regex.cpp =================================================================== --- lib/Support/Regex.cpp +++ lib/Support/Regex.cpp @@ -23,9 +23,22 @@ using namespace llvm; +namespace { + +/// Utility to convert a regex error code into a human-readable string. +void RegexErrorToString(int error, struct llvm_regex *preg, + std::string &Error) { + size_t len = llvm_regerror(error, preg, nullptr, 0); + + Error.resize(len - 1); + llvm_regerror(error, preg, &Error[0], len); +} + +} // namespace + Regex::Regex() : preg(nullptr), error(REG_BADPAT) {} -Regex::Regex(StringRef regex, unsigned Flags) { +Regex::Regex(StringRef regex, unsigned Flags, std::string *Error) { unsigned flags = 0; preg = new llvm_regex(); preg->re_endp = regex.end(); @@ -36,6 +49,16 @@ if (!(Flags & BasicRegex)) flags |= REG_EXTENDED; error = llvm_regcomp(preg, regex.data(), flags|REG_PEND); + + // Log regex compilation error into Error string if it is available. + if (Error) { + if (error) { + RegexErrorToString(error, preg, *Error); + } else { + if (!Error->empty()) + Error->clear(); + } + } } Regex::Regex(Regex &®ex) { @@ -52,19 +75,6 @@ } } -namespace { - -/// Utility to convert a regex error code into a human-readable string. -void RegexErrorToString(int error, struct llvm_regex *preg, - std::string &Error) { - size_t len = llvm_regerror(error, preg, nullptr, 0); - - Error.resize(len - 1); - llvm_regerror(error, preg, &Error[0], len); -} - -} // namespace - bool Regex::isValid(std::string &Error) const { if (!error) return true; @@ -83,11 +93,9 @@ std::string *Error) const { // Reset error, if given. if (Error && !Error->empty()) - *Error = ""; + Error->clear(); - // Check if the regex itself didn't successfully compile. - if (Error ? !isValid(*Error) : !isValid()) - return false; + assert(isValid() && "Assuming the regex successfully compiled."); unsigned nmatch = Matches ? preg->re_nsub+1 : 0; @@ -129,6 +137,20 @@ return true; } +bool Regex::match(StringRef RegexPattern, StringRef String, + SmallVectorImpl *Matches, unsigned Flags, + std::string *Error) { + // Compile the single-use regex. + Regex TmpRegex(RegexPattern, Flags, Error); + + // Bail out if there were regex compile errors. + if (!TmpRegex.isValid()) + return false; + + // Do the single-use match itself. + return TmpRegex.match(String, Matches, Error); +} + std::string Regex::sub(StringRef Repl, StringRef String, std::string *Error) const { SmallVector Matches; @@ -202,6 +224,19 @@ return Res; } +std::string Regex::sub(StringRef RegexPattern, StringRef Repl, StringRef String, + unsigned Flags, std::string *Error) { + // Compile the single-use regex. + Regex TmpRegex(RegexPattern, Flags, Error); + + // Bail out if there were regex compile errors. + if (!TmpRegex.isValid()) + return String; + + // Do the single-use sub itself. + return TmpRegex.sub(Repl, String, Error); +} + // These are the special characters matched in functions like "p_ere_exp". static const char RegexMetachars[] = "()^$|*+?.[]\\{}"; Index: lib/Transforms/Utils/SymbolRewriter.cpp =================================================================== --- lib/Transforms/Utils/SymbolRewriter.cpp +++ lib/Transforms/Utils/SymbolRewriter.cpp @@ -179,7 +179,8 @@ for (auto &C : (M.*Iterator)()) { std::string Error; - std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error); + std::string Name = + Regex::sub(Pattern, Transform, C.getName(), Regex::NoFlags, &Error); if (!Error.empty()) report_fatal_error("unable to transforn " + C.getName() + " in " + M.getModuleIdentifier() + ": " + Error); Index: tools/llvm-cov/CoverageFilters.cpp =================================================================== --- tools/llvm-cov/CoverageFilters.cpp +++ tools/llvm-cov/CoverageFilters.cpp @@ -26,11 +26,11 @@ bool NameRegexCoverageFilter::matches( const coverage::CoverageMapping &, const coverage::FunctionRecord &Function) const { - return llvm::Regex(Regex).match(Function.Name); + return llvm::Regex::match(Regex, Function.Name); } bool NameRegexCoverageFilter::matchesFilename(StringRef Filename) const { - return llvm::Regex(Regex).match(Filename); + return llvm::Regex::match(Regex, Filename); } bool NameWhitelistCoverageFilter::matches( Index: unittests/Support/RegexTest.cpp =================================================================== --- unittests/Support/RegexTest.cpp +++ unittests/Support/RegexTest.cpp @@ -17,6 +17,8 @@ class RegexTest : public ::testing::Test { }; +using RegexDeathTest = RegexTest; + TEST_F(RegexTest, Basics) { Regex r1("^[0-9]+$"); EXPECT_TRUE(r1.match("916")); @@ -139,6 +141,14 @@ EXPECT_EQ("invalid character range", Error); } +TEST_F(RegexTest, ConstructorError) { + std::string Error; + Regex r1("(foo", Regex::NoFlags, &Error); + EXPECT_EQ("parentheses not balanced", Error); + Regex r2("foo", Regex::NoFlags, &Error); + EXPECT_TRUE(Error.empty()); +} + TEST_F(RegexTest, MoveConstruct) { Regex r1("^[0-9]+$"); Regex r2(std::move(r1)); @@ -163,13 +173,18 @@ EXPECT_TRUE(r1.isValid(Error)); } -TEST_F(RegexTest, MatchInvalid) { - Regex r1; - std::string Error; - EXPECT_FALSE(r1.isValid(Error)); - EXPECT_FALSE(r1.match("X")); +// Death tests rely on assert which is disabled in release mode. +#ifndef NDEBUG + +TEST_F(RegexDeathTest, MatchInvalid) { + Regex r1; + std::string Error; + EXPECT_FALSE(r1.isValid(Error)); + EXPECT_DEATH(r1.match("X"), "Assuming the regex successfully compiled."); } +#endif // NDEBUG + // https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=3727 TEST_F(RegexTest, OssFuzz3727Regression) { // Wrap in a StringRef so the NUL byte doesn't terminate the string @@ -178,4 +193,20 @@ EXPECT_FALSE(r.isValid(Error)); } +TEST_F(RegexTest, ConvenienceFunctions) { + std::string Error; + + // static Regex::match + EXPECT_FALSE(Regex::match("(foo", "foo", nullptr, Regex::NoFlags, &Error)); + EXPECT_EQ("parentheses not balanced", Error); + EXPECT_TRUE(Regex::match("^[0-9]+$", "916", nullptr, Regex::NoFlags, &Error)); + EXPECT_TRUE(Error.empty()); + + // static Regex::sub + EXPECT_EQ("aber", Regex::sub("a[b-", "d", "aber", Regex::NoFlags, &Error)); + EXPECT_EQ("invalid character range", Error); + EXPECT_EQ("NUM", Regex::sub("[0-9]+", "NUM", "1234", Regex::NoFlags, &Error)); + EXPECT_TRUE(Error.empty()); +} + }