Index: include/llvm/Support/Regex.h =================================================================== --- include/llvm/Support/Regex.h +++ include/llvm/Support/Regex.h @@ -17,6 +17,7 @@ #ifndef LLVM_SUPPORT_REGEX_H #define LLVM_SUPPORT_REGEX_H +#include "llvm/Support/Error.h" #include struct llvm_regex; @@ -25,6 +26,24 @@ class StringRef; template class SmallVectorImpl; + /// An error from constructing or evaluating a regex. + class RegexError : public ErrorInfo { + public: + static char ID; + + /// Constructs this error to describe \p EC (see regex_impl.h). + explicit RegexError(int EC); + + void log(raw_ostream &OS) const override { OS << message(); } + std::string message() const override; + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } + + private: + int EC; + }; + class Regex { public: enum { @@ -43,6 +62,14 @@ BasicRegex=4 }; + /// Compiles the given regular expression \p Regex and returns it, or a + /// RegexError if the pattern is invalid. + static Expected compile(StringRef Regex, unsigned Flags = NoFlags); + + /// Compiles the given regular expression \p Regex and returns it. + /// Causes a runtime error if the expression is not valid. + static Regex compileKnownValid(StringRef Regex, unsigned Flags = NoFlags); + Regex(); /// Compiles the given regular expression \p Regex. Regex(StringRef Regex, unsigned Flags = NoFlags); Index: include/llvm/Support/YAMLTraits.h =================================================================== --- include/llvm/Support/YAMLTraits.h +++ include/llvm/Support/YAMLTraits.h @@ -456,11 +456,11 @@ if (S.equals(".inf") || S.equals(".Inf") || S.equals(".INF")) return true; - Regex FloatMatcher("^(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$"); - if (FloatMatcher.match(S)) - return true; - - return false; + if (auto FloatMatcher = Regex::compile( + "^(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$")) + return FloatMatcher->match(S); + else + llvm_unreachable("invalid regex"); } inline bool isNumeric(StringRef S) { Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -101,19 +101,21 @@ F->arg_begin()->getType()); return true; } - Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); + auto vldRegex = Regex::compileKnownValid( + "^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); if (vldRegex.match(Name)) { auto fArgs = F->getFunctionType()->params(); SmallVector Tys(fArgs.begin(), fArgs.end()); // Can't use Intrinsic::getDeclaration here as the return types might // then only be structurally equal. - FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); - NewFn = Function::Create(fType, F->getLinkage(), - "llvm." + Name + ".p0i8", F->getParent()); + FunctionType *fType = FunctionType::get(F->getReturnType(), Tys, false); + NewFn = Function::Create(fType, F->getLinkage(), "llvm." + Name + ".p0i8", + F->getParent()); return true; } - Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); - if (vstRegex.match(Name)) { + auto vstRegex = Regex::compile( + "^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); + if (vstRegex && vstRegex->match(Name)) { static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2, Intrinsic::arm_neon_vst3, Index: lib/IR/DiagnosticInfo.cpp =================================================================== --- lib/IR/DiagnosticInfo.cpp +++ lib/IR/DiagnosticInfo.cpp @@ -42,14 +42,16 @@ void operator=(const std::string &Val) { // Create a regexp object to match pass names for emitOptimizationRemark. - if (!Val.empty()) { - Pattern = std::make_shared(Val); - std::string RegexError; - if (!Pattern->isValid(RegexError)) - report_fatal_error("Invalid regular expression '" + Val + - "' in -pass-remarks: " + RegexError, - false); - } + if (Val.empty()) + return; + + if (auto RegexOrError = Regex::compile(Val)) + Pattern = std::make_shared(std::move(*RegexOrError)); + else + report_fatal_error( + "Invalid regular expression '" + Val + + "' in -pass-remarks: " + toString(RegexOrError.takeError()), + false); } }; Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -146,7 +146,8 @@ using namespace llvm; -static Regex DefaultAliasRegex("^(default|lto-pre-link|lto)<(O[0123sz])>$"); +static Regex DefaultAliasRegex = ExitOnError("invalid regex literal")( + Regex::compile("^(default|lto-pre-link|lto)<(O[0123sz])>$")); static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { switch (Level) { Index: lib/Support/Regex.cpp =================================================================== --- lib/Support/Regex.cpp +++ lib/Support/Regex.cpp @@ -19,6 +19,37 @@ #include using namespace llvm; +char RegexError::ID; + +RegexError::RegexError(int EC) : EC(EC) {} + +std::string RegexError::message() const { + size_t len = llvm_regerror(EC, nullptr, nullptr, 0); + std::string Msg; + Msg.resize(len - 1); + llvm_regerror(EC, nullptr, &Msg[0], len); + return Msg; +} + +Expected Regex::compile(StringRef regex, unsigned Flags) { + // An empty regex is a valid POSIX extended RE (POSIX 1003.2), but + // regcomp unconditionally reports REG_BADPAT. + if (regex.empty() && !(Flags®_BASIC)) + return Regex(); + Regex R(regex, Flags); + if (R.error) { + return make_error(R.error); + } + return std::move(R); +} + +Regex Regex::compileKnownValid(StringRef regex, unsigned Flags) { + if (auto regexp = compile(regex, Flags)) + return std::move(*regexp); + else + llvm_unreachable("bad regexp"); +} + Regex::Regex() : preg(nullptr), error(REG_BADPAT) {} Regex::Regex(StringRef regex, unsigned Flags) { @@ -51,11 +82,7 @@ bool Regex::isValid(std::string &Error) { if (!error) return true; - - size_t len = llvm_regerror(error, preg, nullptr, 0); - - Error.resize(len - 1); - llvm_regerror(error, preg, &Error[0], len); + Error = RegexError(error).message(); return false; } Index: lib/Support/SpecialCaseList.cpp =================================================================== --- lib/Support/SpecialCaseList.cpp +++ lib/Support/SpecialCaseList.cpp @@ -124,11 +124,13 @@ } // Check that the regexp is valid. - Regex CheckRE(Regexp); - std::string REError; - if (!CheckRE.isValid(REError)) { + Regex CheckRE; + if (auto RegexpOrError = Regex::compile(Regexp)) { + CheckRE = std::move(*RegexpOrError); + } else { Error = (Twine("malformed regex in line ") + Twine(LineNo) + ": '" + - SplitLine.second + "': " + REError).str(); + SplitLine.second + "': " + toString(RegexpOrError.takeError())) + .str(); return false; } @@ -149,7 +151,11 @@ for (StringMap::const_iterator II = I->second.begin(), IE = I->second.end(); II != IE; ++II) { - Entries[I->getKey()][II->getKey()].RegEx.reset(new Regex(II->getValue())); + // The regexes were validated above, when they were added to the + // Regexps map. + Entries[I->getKey()][II->getKey()].RegEx = llvm::make_unique( + ExitOnError("combined validated regexes were somehow invalid")( + Regex::compile(II->getValue()))); } } Regexps.clear(); Index: lib/Support/regerror.c =================================================================== --- lib/Support/regerror.c +++ lib/Support/regerror.c @@ -49,8 +49,6 @@ #define snprintf _snprintf #endif -static const char *regatoi(const llvm_regex_t *, char *, int); - static struct rerr { int code; const char *name; @@ -85,29 +83,14 @@ { struct rerr *r; size_t len; - int target = errcode &~ REG_ITOA; + int target = errcode; const char *s; - char convbuf[50]; - if (errcode == REG_ATOI) - s = regatoi(preg, convbuf, sizeof convbuf); - else { - for (r = rerrs; r->code != 0; r++) - if (r->code == target) - break; - - if (errcode®_ITOA) { - if (r->code != 0) { - assert(strlen(r->name) < sizeof(convbuf)); - (void) llvm_strlcpy(convbuf, r->name, sizeof convbuf); - } else - (void)snprintf(convbuf, sizeof convbuf, - "REG_0x%x", target); - s = convbuf; - } else - s = r->explain; - } + for (r = rerrs; r->code != 0; r++) + if (r->code == target) + break; + s = r->explain; len = strlen(s) + 1; if (errbuf_size > 0) { llvm_strlcpy(errbuf, s, errbuf_size); @@ -115,21 +98,3 @@ return(len); } - -/* - - regatoi - internal routine to implement REG_ATOI - */ -static const char * -regatoi(const llvm_regex_t *preg, char *localbuf, int localbufsize) -{ - struct rerr *r; - - for (r = rerrs; r->code != 0; r++) - if (strcmp(r->name, preg->re_endp) == 0) - break; - if (r->code == 0) - return("0"); - - (void)snprintf(localbuf, localbufsize, "%d", r->code); - return(localbuf); -} Index: lib/Support/regex_impl.h =================================================================== --- lib/Support/regex_impl.h +++ lib/Support/regex_impl.h @@ -79,8 +79,6 @@ #define REG_EMPTY 14 #define REG_ASSERT 15 #define REG_INVARG 16 -#define REG_ATOI 255 /* convert name to number (!) */ -#define REG_ITOA 0400 /* convert number to name (!) */ /* llvm_regexec() flags */ #define REG_NOTBOL 00001 Index: lib/Target/AArch64/Utils/AArch64BaseInfo.cpp =================================================================== --- lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -83,11 +83,14 @@ uint32_t AArch64SysReg::parseGenericRegister(StringRef Name) { // Try to parse an S____ register name - Regex GenericRegPattern("^S([0-3])_([0-7])_C([0-9]|1[0-5])_C([0-9]|1[0-5])_([0-7])$"); + auto GenericRegPattern = Regex::compile( + "^S([0-3])_([0-7])_C([0-9]|1[0-5])_C([0-9]|1[0-5])_([0-7])$"); std::string UpperName = Name.upper(); SmallVector Ops; - if (!GenericRegPattern.match(UpperName, &Ops)) + // Note that if GenericRegPattern failed to compile, this does not handle + // that error and will crash with asserts enabled. + if (GenericRegPattern && !GenericRegPattern->match(UpperName, &Ops)) return -1; uint32_t Op0 = 0, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0; Index: lib/Target/ARM/ARMScheduleR52.td =================================================================== --- lib/Target/ARM/ARMScheduleR52.td +++ lib/Target/ARM/ARMScheduleR52.td @@ -314,7 +314,7 @@ def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi", "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi", - "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>; + "t2ADD?S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>; def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr", Index: lib/Transforms/Utils/SymbolRewriter.cpp =================================================================== --- lib/Transforms/Utils/SymbolRewriter.cpp +++ lib/Transforms/Utils/SymbolRewriter.cpp @@ -155,11 +155,16 @@ performOnModule(Module &M) { bool Changed = false; for (auto &C : (M.*Iterator)()) { + auto RegexOrError = Regex::compile(Pattern); + if (!RegexOrError) { + report_fatal_error("unable to transform " + C.getName() + " in " + + M.getModuleIdentifier() + ": " + + toString(RegexOrError.takeError())); + } std::string Error; - - std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error); + std::string Name = RegexOrError->sub(Transform, C.getName(), &Error); if (!Error.empty()) - report_fatal_error("unable to transforn " + C.getName() + " in " + + report_fatal_error("unable to transform " + C.getName() + " in " + M.getModuleIdentifier() + ": " + Error); if (C.getName() == Name) @@ -330,11 +335,11 @@ KeyValue = Key->getValue(KeyStorage); if (KeyValue.equals("source")) { - std::string Error; - Source = Value->getValue(ValueStorage); - if (!Regex(Source).isValid(Error)) { - YS.printError(Field.getKey(), "invalid regex: " + Error); + auto RegexOrError = Regex::compile(Source); + if (!RegexOrError) { + YS.printError(Field.getKey(), + "invalid regex: " + toString(RegexOrError.takeError())); return false; } } else if (KeyValue.equals("target")) { @@ -399,11 +404,11 @@ KeyValue = Key->getValue(KeyStorage); if (KeyValue.equals("source")) { - std::string Error; - Source = Value->getValue(ValueStorage); - if (!Regex(Source).isValid(Error)) { - YS.printError(Field.getKey(), "invalid regex: " + Error); + auto RegexOrError = Regex::compile(Source); + if (!RegexOrError) { + YS.printError(Field.getKey(), + "invalid regex: " + toString(RegexOrError.takeError())); return false; } } else if (KeyValue.equals("target")) { @@ -462,11 +467,11 @@ KeyValue = Key->getValue(KeyStorage); if (KeyValue.equals("source")) { - std::string Error; - Source = Value->getValue(ValueStorage); - if (!Regex(Source).isValid(Error)) { - YS.printError(Field.getKey(), "invalid regex: " + Error); + auto RegexOrError = Regex::compile(Source); + if (!RegexOrError) { + YS.printError(Field.getKey(), + "invalid regex: " + toString(RegexOrError.takeError())); return false; } } else if (KeyValue.equals("target")) { Index: tools/llvm-cov/CodeCoverage.cpp =================================================================== --- tools/llvm-cov/CodeCoverage.cpp +++ tools/llvm-cov/CodeCoverage.cpp @@ -604,9 +604,15 @@ auto NameFilterer = new CoverageFilters; for (const auto &Name : NameFilters) NameFilterer->push_back(llvm::make_unique(Name)); - for (const auto &Regex : NameRegexFilters) - NameFilterer->push_back( - llvm::make_unique(Regex)); + for (const auto &Regex : NameRegexFilters) { + if (auto RegexOrError = llvm::Regex::compile(Regex)) { + NameFilterer->push_back(llvm::make_unique( + std::move(*RegexOrError))); + } else { + error("Invalid regex.", toString(RegexOrError.takeError())); + return 1; + } + } Filters.push_back(std::unique_ptr(NameFilterer)); } if (RegionCoverageLtFilter.getNumOccurrences() || Index: tools/llvm-cov/CoverageFilters.h =================================================================== --- tools/llvm-cov/CoverageFilters.h +++ tools/llvm-cov/CoverageFilters.h @@ -15,6 +15,7 @@ #define LLVM_COV_COVERAGEFILTERS_H #include "llvm/ProfileData/Coverage/CoverageMapping.h" +#include "llvm/Support/Regex.h" #include #include @@ -43,10 +44,10 @@ /// \brief Matches functions whose name matches a certain regular expression. class NameRegexCoverageFilter : public CoverageFilter { - StringRef Regex; + llvm::Regex RE; public: - NameRegexCoverageFilter(StringRef Regex) : Regex(Regex) {} + NameRegexCoverageFilter(llvm::Regex RE) : RE(std::move(RE)) {} bool matches(const coverage::FunctionRecord &Function) override; }; Index: tools/llvm-cov/CoverageFilters.cpp =================================================================== --- tools/llvm-cov/CoverageFilters.cpp +++ tools/llvm-cov/CoverageFilters.cpp @@ -24,7 +24,7 @@ bool NameRegexCoverageFilter::matches(const coverage::FunctionRecord &Function) { - return llvm::Regex(Regex).match(Function.Name); + return RE.match(Function.Name); } bool RegionCoverageFilter::matches(const coverage::FunctionRecord &Function) { Index: tools/llvm-extract/llvm-extract.cpp =================================================================== --- tools/llvm-extract/llvm-extract.cpp +++ tools/llvm-extract/llvm-extract.cpp @@ -136,15 +136,16 @@ // Extract aliases via regular expression matching. for (size_t i = 0, e = ExtractRegExpAliases.size(); i != e; ++i) { std::string Error; - Regex RegEx(ExtractRegExpAliases[i]); - if (!RegEx.isValid(Error)) { + auto RegEx = Regex::compile(ExtractRegExpAliases[i]); + if (!RegEx) { errs() << argv[0] << ": '" << ExtractRegExpAliases[i] << "' " - "invalid regex: " << Error; + "invalid regex: " << toString(RegEx.takeError()); + return 1; } bool match = false; for (Module::alias_iterator GA = M->alias_begin(), E = M->alias_end(); GA != E; GA++) { - if (RegEx.match(GA->getName())) { + if (RegEx->match(GA->getName())) { GVs.insert(&*GA); match = true; } @@ -170,14 +171,15 @@ // Extract globals via regular expression matching. for (size_t i = 0, e = ExtractRegExpGlobals.size(); i != e; ++i) { std::string Error; - Regex RegEx(ExtractRegExpGlobals[i]); - if (!RegEx.isValid(Error)) { + auto RegEx = Regex::compile(ExtractRegExpGlobals[i]); + if (!RegEx) { errs() << argv[0] << ": '" << ExtractRegExpGlobals[i] << "' " - "invalid regex: " << Error; + "invalid regex: " << toString(RegEx.takeError()); + return 1; } bool match = false; for (auto &GV : M->globals()) { - if (RegEx.match(GV.getName())) { + if (RegEx->match(GV.getName())) { GVs.insert(&GV); match = true; } @@ -203,15 +205,16 @@ for (size_t i = 0, e = ExtractRegExpFuncs.size(); i != e; ++i) { std::string Error; StringRef RegExStr = ExtractRegExpFuncs[i]; - Regex RegEx(RegExStr); - if (!RegEx.isValid(Error)) { + auto RegEx = Regex::compile(RegExStr); + if (!RegEx) { errs() << argv[0] << ": '" << ExtractRegExpFuncs[i] << "' " - "invalid regex: " << Error; + "invalid regex: " << toString(RegEx.takeError()); + return 1; } bool match = false; for (Module::iterator F = M->begin(), E = M->end(); F != E; F++) { - if (RegEx.match(F->getName())) { + if (RegEx->match(F->getName())) { GVs.insert(&*F); match = true; } Index: tools/llvm-pdbdump/LinePrinter.h =================================================================== --- tools/llvm-pdbdump/LinePrinter.h +++ tools/llvm-pdbdump/LinePrinter.h @@ -24,7 +24,7 @@ friend class WithColor; public: - LinePrinter(int Indent, raw_ostream &Stream); + LinePrinter(int Indent, raw_ostream &Stream, Error* Err = nullptr); void Indent(); void Unindent(); @@ -39,10 +39,18 @@ private: template - void SetFilters(std::list &List, Iter Begin, Iter End) { + void SetFilters(std::list &List, Iter Begin, Iter End, Error *Err) { + ErrorAsOutParameter ErrAsOutParam(Err); List.clear(); - for (; Begin != End; ++Begin) - List.emplace_back(StringRef(*Begin)); + for (; Begin != End; ++Begin) { + if (auto RegexOrError = Regex::compile(*Begin)) { + List.emplace_back(std::move(*RegexOrError)); + } else if (Err) { + *Err = joinErrors(std::move(*Err), RegexOrError.takeError()); + } else { + consumeError(RegexOrError.takeError()); + } + } } raw_ostream &OS; Index: tools/llvm-pdbdump/LinePrinter.cpp =================================================================== --- tools/llvm-pdbdump/LinePrinter.cpp +++ tools/llvm-pdbdump/LinePrinter.cpp @@ -42,21 +42,21 @@ using namespace llvm; -LinePrinter::LinePrinter(int Indent, llvm::raw_ostream &Stream) +LinePrinter::LinePrinter(int Indent, llvm::raw_ostream &Stream, Error *Err) : OS(Stream), IndentSpaces(Indent), CurrentIndent(0) { SetFilters(ExcludeTypeFilters, opts::pretty::ExcludeTypes.begin(), - opts::pretty::ExcludeTypes.end()); + opts::pretty::ExcludeTypes.end(), Err); SetFilters(ExcludeSymbolFilters, opts::pretty::ExcludeSymbols.begin(), - opts::pretty::ExcludeSymbols.end()); + opts::pretty::ExcludeSymbols.end(), Err); SetFilters(ExcludeCompilandFilters, opts::pretty::ExcludeCompilands.begin(), - opts::pretty::ExcludeCompilands.end()); + opts::pretty::ExcludeCompilands.end(), Err); SetFilters(IncludeTypeFilters, opts::pretty::IncludeTypes.begin(), - opts::pretty::IncludeTypes.end()); + opts::pretty::IncludeTypes.end(), Err); SetFilters(IncludeSymbolFilters, opts::pretty::IncludeSymbols.begin(), - opts::pretty::IncludeSymbols.end()); + opts::pretty::IncludeSymbols.end(), Err); SetFilters(IncludeCompilandFilters, opts::pretty::IncludeCompilands.begin(), - opts::pretty::IncludeCompilands.end()); + opts::pretty::IncludeCompilands.end(), Err); } void LinePrinter::Indent() { CurrentIndent += IndentSpaces; } Index: tools/llvm-pdbdump/llvm-pdbdump.cpp =================================================================== --- tools/llvm-pdbdump/llvm-pdbdump.cpp +++ tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -456,7 +456,9 @@ if (opts::pretty::LoadAddress) Session->setLoadAddress(opts::pretty::LoadAddress); - LinePrinter Printer(2, outs()); + Error Err = Error::success(); + LinePrinter Printer(2, outs(), &Err); + ExitOnErr(std::move(Err)); auto GlobalScope(Session->getGlobalScope()); std::string FileName(GlobalScope->getSymbolsFileName()); @@ -581,7 +583,7 @@ cl::ParseCommandLineOptions(argv.size(), argv.data(), "LLVM PDB Dumper\n"); if (!opts::raw::DumpBlockRangeOpt.empty()) { - llvm::Regex R("^([0-9]+)(-([0-9]+))?$"); + auto R = ExitOnErr(llvm::Regex::compile("^([0-9]+)(-([0-9]+))?$")); llvm::SmallVector Matches; if (!R.match(opts::raw::DumpBlockRangeOpt, &Matches)) { errs() << "Argument '" << opts::raw::DumpBlockRangeOpt Index: tools/sancov/sancov.cc =================================================================== --- tools/sancov/sancov.cc +++ tools/sancov/sancov.cc @@ -132,8 +132,10 @@ static const uint32_t Bitness32 = 0xFFFFFF32; static const uint32_t Bitness64 = 0xFFFFFF64; -static Regex SancovFileRegex("(.*)\\.[0-9]+\\.sancov"); -static Regex SymcovFileRegex(".*\\.symcov"); +static Regex SancovFileRegex = ExitOnError("invalid regex literal")( + Regex::compile("(.*)\\.[0-9]+\\.sancov")); +static Regex SymcovFileRegex = + ExitOnError("invalid regex literal")(Regex::compile(".*\\.symcov")); // --------- MAIN DATASTRUCTURES ---------- Index: unittests/Support/RegexTest.cpp =================================================================== --- unittests/Support/RegexTest.cpp +++ unittests/Support/RegexTest.cpp @@ -16,21 +16,30 @@ namespace { class RegexTest : public ::testing::Test { +protected: + Regex makeRegex(StringRef Pattern) { + auto RegexOrError = Regex::compile(Pattern); + if (!RegexOrError) { + ADD_FAILURE() << toString(RegexOrError.takeError()); + return Regex(); + } + return std::move(*RegexOrError); + } }; TEST_F(RegexTest, Basics) { - Regex r1("^[0-9]+$"); + auto r1 = makeRegex("^[0-9]+$"); EXPECT_TRUE(r1.match("916")); EXPECT_TRUE(r1.match("9")); EXPECT_FALSE(r1.match("9a")); SmallVector Matches; - Regex r2("[0-9]+"); + auto r2 = makeRegex("[0-9]+"); EXPECT_TRUE(r2.match("aa216b", &Matches)); EXPECT_EQ(1u, Matches.size()); EXPECT_EQ("216", Matches[0].str()); - Regex r3("[0-9]+([a-f])?:([0-9]+)"); + auto r3 = makeRegex("[0-9]+([a-f])?:([0-9]+)"); EXPECT_TRUE(r3.match("9a:513b", &Matches)); EXPECT_EQ(3u, Matches.size()); EXPECT_EQ("9a:513", Matches[0].str()); @@ -43,7 +52,7 @@ EXPECT_EQ("", Matches[1].str()); EXPECT_EQ("513", Matches[2].str()); - Regex r4("a[^b]+b"); + auto r4 = makeRegex("a[^b]+b"); std::string String="axxb"; String[2] = '\0'; EXPECT_FALSE(r4.match("abb")); @@ -54,7 +63,7 @@ std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)"; String="YX99a:513b"; NulPattern[7] = '\0'; - Regex r5(NulPattern); + auto r5 = makeRegex(NulPattern); EXPECT_FALSE(r5.match(String)); EXPECT_FALSE(r5.match("X9")); String[3]='\0'; @@ -62,19 +71,19 @@ } TEST_F(RegexTest, Backreferences) { - Regex r1("([a-z]+)_\\1"); + auto r1 = makeRegex("([a-z]+)_\\1"); SmallVector Matches; EXPECT_TRUE(r1.match("abc_abc", &Matches)); EXPECT_EQ(2u, Matches.size()); EXPECT_FALSE(r1.match("abc_ab", &Matches)); - Regex r2("a([0-9])b\\1c\\1"); + auto r2 = makeRegex("a([0-9])b\\1c\\1"); EXPECT_TRUE(r2.match("a4b4c4", &Matches)); EXPECT_EQ(2u, Matches.size()); EXPECT_EQ("4", Matches[1].str()); EXPECT_FALSE(r2.match("a2b2c3")); - Regex r3("a([0-9])([a-z])b\\1\\2"); + auto r3 = makeRegex("a([0-9])([a-z])b\\1\\2"); EXPECT_TRUE(r3.match("a6zb6z", &Matches)); EXPECT_EQ(3u, Matches.size()); EXPECT_EQ("6", Matches[1].str()); @@ -86,29 +95,29 @@ TEST_F(RegexTest, Substitution) { std::string Error; - EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber")); + EXPECT_EQ("aNUMber", makeRegex("[0-9]+").sub("NUM", "a1234ber")); // Standard Escapes - EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error)); + EXPECT_EQ("a\\ber", makeRegex("[0-9]+").sub("\\\\", "a1234ber", &Error)); EXPECT_EQ("", Error); - EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error)); + EXPECT_EQ("a\nber", makeRegex("[0-9]+").sub("\\n", "a1234ber", &Error)); EXPECT_EQ("", Error); - EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error)); + EXPECT_EQ("a\tber", makeRegex("[0-9]+").sub("\\t", "a1234ber", &Error)); EXPECT_EQ("", Error); - EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error)); + EXPECT_EQ("ajber", makeRegex("[0-9]+").sub("\\j", "a1234ber", &Error)); EXPECT_EQ("", Error); - EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error)); + EXPECT_EQ("aber", makeRegex("[0-9]+").sub("\\", "a1234ber", &Error)); EXPECT_EQ(Error, "replacement string contained trailing backslash"); // Backreferences - EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error)); + EXPECT_EQ("aa1234bber", makeRegex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error)); EXPECT_EQ("", Error); - EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error)); + EXPECT_EQ("a1234ber", makeRegex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error)); EXPECT_EQ("", Error); - EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error)); + EXPECT_EQ("aber", makeRegex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error)); EXPECT_EQ(Error, "invalid backreference string '100'"); } @@ -133,22 +142,21 @@ } TEST_F(RegexTest, IsValid) { - std::string Error; - EXPECT_FALSE(Regex("(foo").isValid(Error)); - EXPECT_EQ("parentheses not balanced", Error); - EXPECT_FALSE(Regex("a[b-").isValid(Error)); - EXPECT_EQ("invalid character range", Error); + auto RE = Regex::compile("(foo"); + EXPECT_EQ("parentheses not balanced", toString(RE.takeError())); + RE = Regex::compile("a[b-"); + EXPECT_EQ("invalid character range", toString(RE.takeError())); } TEST_F(RegexTest, MoveConstruct) { - Regex r1("^[0-9]+$"); + auto r1 = makeRegex("^[0-9]+$"); Regex r2(std::move(r1)); EXPECT_TRUE(r2.match("916")); } TEST_F(RegexTest, MoveAssign) { - Regex r1("^[0-9]+$"); - Regex r2("abc"); + auto r1 = makeRegex("^[0-9]+$"); + auto r2 = makeRegex("abc"); r2 = std::move(r1); EXPECT_TRUE(r2.match("916")); std::string Error; @@ -160,7 +168,7 @@ Regex r1; EXPECT_FALSE(r1.isValid(Error)); EXPECT_EQ("invalid regular expression", Error); - r1 = Regex("abc"); + r1 = makeRegex("abc"); EXPECT_TRUE(r1.isValid(Error)); } Index: utils/FileCheck/FileCheck.cpp =================================================================== --- utils/FileCheck/FileCheck.cpp +++ utils/FileCheck/FileCheck.cpp @@ -336,16 +336,15 @@ } bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { - Regex R(RS); - std::string Error; - if (!R.isValid(Error)) { + auto R = Regex::compile(RS); + if (!R) { SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, - "invalid regex: " + Error); + "invalid regex: " + toString(R.takeError())); return true; } RegExStr += RS.str(); - CurParen += R.getNumMatches(); + CurParen += R->getNumMatches(); return false; } @@ -436,7 +435,10 @@ } SmallVector MatchInfo; - if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) + auto R = Regex::compile(RegExToMatch, Regex::Newline); + // Note that if the regex is not valid, the following check does not handle + // the error, so this will crash with asserts enabled. + if (!R || !R->match(Buffer, &MatchInfo)) return StringRef::npos; // Successful regex match. @@ -1209,8 +1211,10 @@ // A check prefix must contain only alphanumeric, hyphens and underscores. static bool ValidateCheckPrefix(StringRef CheckPrefix) { - Regex Validator("^[a-zA-Z0-9_-]*$"); - return Validator.match(CheckPrefix); + auto Validator = Regex::compile("^[a-zA-Z0-9_-]*$"); + // Note that this does not handle errors returned by Regex::compile. If the + // regex string literal is invalid, this will crash when asserts are enabled. + return Validator && Validator->match(CheckPrefix); } static bool ValidateCheckPrefixes() { Index: utils/TableGen/CodeGenSchedule.cpp =================================================================== --- utils/TableGen/CodeGenSchedule.cpp +++ utils/TableGen/CodeGenSchedule.cpp @@ -76,7 +76,13 @@ pat.insert(0, "^("); pat.insert(pat.end(), ')'); } - RegexList.push_back(Regex(pat)); + if (auto RegexOrError = Regex::compile(pat)) { + RegexList.push_back(std::move(*RegexOrError)); + } else { + PrintFatalError(Loc, + "instregex got invalid pattern string: " + + toString(RegexOrError.takeError()) + ": " + pat); + } } for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) { for (auto &R : RegexList) {