Index: lldb/include/lldb/Interpreter/OptionValueRegex.h =================================================================== --- lldb/include/lldb/Interpreter/OptionValueRegex.h +++ lldb/include/lldb/Interpreter/OptionValueRegex.h @@ -36,7 +36,7 @@ VarSetOperationType = eVarSetOperationAssign) = delete; bool Clear() override { - m_regex.Clear(); + m_regex = RegularExpression(); m_value_was_set = false; return true; } @@ -52,7 +52,7 @@ if (value && value[0]) m_regex.Compile(llvm::StringRef(value)); else - m_regex.Clear(); + m_regex = RegularExpression(); } bool IsValid() const { return m_regex.IsValid(); } Index: lldb/include/lldb/Utility/RegularExpression.h =================================================================== --- lldb/include/lldb/Utility/RegularExpression.h +++ lldb/include/lldb/Utility/RegularExpression.h @@ -9,98 +9,22 @@ #ifndef liblldb_RegularExpression_h_ #define liblldb_RegularExpression_h_ -#ifdef _WIN32 -#include "../lib/Support/regex_impl.h" - -typedef llvm_regmatch_t regmatch_t; -typedef llvm_regex_t regex_t; - -inline int regcomp(llvm_regex_t *a, const char *b, int c) { - return llvm_regcomp(a, b, c); -} - -inline size_t regerror(int a, const llvm_regex_t *b, char *c, size_t d) { - return llvm_regerror(a, b, c, d); -} - -inline int regexec(const llvm_regex_t *a, const char *b, size_t c, - llvm_regmatch_t d[], int e) { - return llvm_regexec(a, b, c, d, e); -} - -inline void regfree(llvm_regex_t *a) { llvm_regfree(a); } -#else -#ifdef __ANDROID__ -#include -#endif -#include -#endif - -#include -#include - -#include -#include - -namespace llvm { -class StringRef; -} // namespace llvm +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Regex.h" namespace lldb_private { -/// \class RegularExpression RegularExpression.h -/// "lldb/Utility/RegularExpression.h" -/// A C++ wrapper class for regex. -/// -/// This regular expression class wraps the posix regex functions \c -/// regcomp(), \c regerror(), \c regexec(), and \c regfree() from the header -/// file in \c /usr/include/regex\.h. -class RegularExpression { +class RegularExpression : public llvm::Regex { public: - class Match { - public: - Match(uint32_t max_matches) : m_matches() { - if (max_matches > 0) - m_matches.resize(max_matches + 1); - } - - void Clear() { - const size_t num_matches = m_matches.size(); - regmatch_t invalid_match = {-1, -1}; - for (size_t i = 0; i < num_matches; ++i) - m_matches[i] = invalid_match; - } - - size_t GetSize() const { return m_matches.size(); } - - regmatch_t *GetData() { - return (m_matches.empty() ? nullptr : m_matches.data()); - } - - bool GetMatchAtIndex(llvm::StringRef s, uint32_t idx, - std::string &match_str) const; - - bool GetMatchAtIndex(llvm::StringRef s, uint32_t idx, - llvm::StringRef &match_str) const; - - protected: - std::vector - m_matches; ///< Where parenthesized subexpressions results are stored - }; - /// Default constructor. /// /// The default constructor that initializes the object state such that it /// contains no compiled regular expression. - RegularExpression(); + RegularExpression() = default; explicit RegularExpression(llvm::StringRef string); - - /// Destructor. - /// - /// Any previously compiled regular expression contained in this object will - /// be freed. - ~RegularExpression(); + ~RegularExpression() = default; RegularExpression(const RegularExpression &rhs); @@ -122,7 +46,6 @@ /// \b true if the regular expression compiles successfully, /// \b false otherwise. bool Compile(llvm::StringRef string); - bool Compile(const char *) = delete; /// Executes a regular expression. /// @@ -143,16 +66,8 @@ /// \return /// \b true if \a string matches the compiled regular /// expression, \b false otherwise. - bool Execute(llvm::StringRef string, Match *match = nullptr) const; - bool Execute(const char *, Match * = nullptr) = delete; - - size_t GetErrorAsCString(char *err_str, size_t err_str_max_len) const; - - /// Free the compiled regular expression. - /// - /// If this object contains a valid compiled regular expression, this - /// function will free any resources it was consuming. - void Free(); + bool Execute(llvm::StringRef string, + llvm::SmallVectorImpl *matches = nullptr) const; /// Access the regular expression text. /// @@ -169,25 +84,18 @@ /// Test if this object contains a valid regular expression. /// /// \return - /// \b true if the regular expression compiled and is ready - /// for execution, \b false otherwise. + /// \b true if the regular expression compiled and is ready for + /// execution, \b false otherwise. bool IsValid() const; - void Clear() { - Free(); - m_re.clear(); - m_comp_err = 1; - } - - int GetErrorCode() const { return m_comp_err; } - - bool operator<(const RegularExpression &rhs) const; + /// Return an error if the regular expression failed to compile. + llvm::Error GetError() const; private: - // Member variables - std::string m_re; ///< A copy of the original regular expression text - int m_comp_err; ///< Status code for the regular expression compilation - regex_t m_preg; ///< The compiled regular expression + /// A copy of the original regular expression text. + std::string m_regex_text; + /// The compiled regular expression. + mutable llvm::Regex m_regex; }; } // namespace lldb_private Index: lldb/source/Commands/CommandObjectBreakpoint.cpp =================================================================== --- lldb/source/Commands/CommandObjectBreakpoint.cpp +++ lldb/source/Commands/CommandObjectBreakpoint.cpp @@ -682,12 +682,10 @@ // name { RegularExpression regexp(m_options.m_func_regexp); - if (!regexp.IsValid()) { - char err_str[1024]; - regexp.GetErrorAsCString(err_str, sizeof(err_str)); + if (llvm::Error err = regexp.GetError()) { result.AppendErrorWithFormat( "Function name regular expression could not be compiled: \"%s\"", - err_str); + llvm::toString(std::move(err)).c_str()); result.SetStatus(eReturnStatusFailed); return false; } @@ -718,12 +716,10 @@ } RegularExpression regexp(m_options.m_source_text_regexp); - if (!regexp.IsValid()) { - char err_str[1024]; - regexp.GetErrorAsCString(err_str, sizeof(err_str)); + if (llvm::Error err = regexp.GetError()) { result.AppendErrorWithFormat( "Source text regular expression could not be compiled: \"%s\"", - err_str); + llvm::toString(std::move(err)).c_str()); result.SetStatus(eReturnStatusFailed); return false; } Index: lldb/source/Commands/CommandObjectFrame.cpp =================================================================== --- lldb/source/Commands/CommandObjectFrame.cpp +++ lldb/source/Commands/CommandObjectFrame.cpp @@ -1,4 +1,4 @@ -//===-- CommandObjectFrame.cpp ----------------------------------*- C++ -*-===// +//===-- CommandObrectFrame.cpp ----------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -573,9 +573,9 @@ entry.c_str()); } } else { - char regex_error[1024]; - if (regex.GetErrorAsCString(regex_error, sizeof(regex_error))) - result.GetErrorStream().Printf("error: %s\n", regex_error); + if (llvm::Error err = regex.GetError()) + result.GetErrorStream().Printf( + "error: %s\n", llvm::toString(std::move(err)).c_str()); else result.GetErrorStream().Printf( "error: unknown regex error when compiling '%s'\n", Index: lldb/source/Core/Disassembler.cpp =================================================================== --- lldb/source/Core/Disassembler.cpp +++ lldb/source/Core/Disassembler.cpp @@ -355,12 +355,9 @@ const char *function_name = sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments) .GetCString(); - if (function_name) { - RegularExpression::Match regex_match(1); - if (avoid_regex->Execute(function_name, ®ex_match)) { - // skip this source line - return true; - } + if (function_name && avoid_regex->Execute(function_name)) { + // skip this source line + return true; } } // don't skip this source line @@ -793,10 +790,9 @@ std::string value; static RegularExpression g_reg_exp( llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$")); - RegularExpression::Match regex_match(1); - bool reg_exp_success = g_reg_exp.Execute(line, ®ex_match); - if (reg_exp_success) - regex_match.GetMatchAtIndex(line.c_str(), 1, value); + llvm::SmallVector matches; + if (g_reg_exp.Execute(line, &matches)) + value = matches[1].str(); else value = line; @@ -856,14 +852,15 @@ if (!line.empty()) { static RegularExpression g_reg_exp(llvm::StringRef( "^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$")); - RegularExpression::Match regex_match(2); - bool reg_exp_success = g_reg_exp.Execute(line, ®ex_match); + llvm::SmallVector matches; + + bool reg_exp_success = g_reg_exp.Execute(line, &matches); std::string key; std::string value; if (reg_exp_success) { - regex_match.GetMatchAtIndex(line.c_str(), 1, key); - regex_match.GetMatchAtIndex(line.c_str(), 2, value); + key = matches[1].str(); + value = matches[2].str(); } else { out_stream->Printf("Instruction::ReadDictionary: Failure executing " "regular expression.\n"); Index: lldb/source/Host/common/Socket.cpp =================================================================== --- lldb/source/Host/common/Socket.cpp +++ lldb/source/Host/common/Socket.cpp @@ -282,27 +282,25 @@ int32_t &port, Status *error_ptr) { static RegularExpression g_regex( llvm::StringRef("([^:]+|\\[[0-9a-fA-F:]+.*\\]):([0-9]+)")); - RegularExpression::Match regex_match(2); - if (g_regex.Execute(host_and_port, ®ex_match)) { - if (regex_match.GetMatchAtIndex(host_and_port, 1, host_str) && - regex_match.GetMatchAtIndex(host_and_port, 2, port_str)) { - // IPv6 addresses are wrapped in [] when specified with ports - if (host_str.front() == '[' && host_str.back() == ']') - host_str = host_str.substr(1, host_str.size() - 2); - bool ok = false; - port = StringConvert::ToUInt32(port_str.c_str(), UINT32_MAX, 10, &ok); - if (ok && port <= UINT16_MAX) { - if (error_ptr) - error_ptr->Clear(); - return true; - } - // port is too large + llvm::SmallVector matches; + if (g_regex.Execute(host_and_port, &matches)) { + host_str = matches[1].str(); + port_str = matches[2].str(); + // IPv6 addresses are wrapped in [] when specified with ports + if (host_str.front() == '[' && host_str.back() == ']') + host_str = host_str.substr(1, host_str.size() - 2); + bool ok = false; + port = StringConvert::ToUInt32(port_str.c_str(), UINT32_MAX, 10, &ok); + if (ok && port <= UINT16_MAX) { if (error_ptr) - error_ptr->SetErrorStringWithFormat( - "invalid host:port specification: '%s'", - host_and_port.str().c_str()); - return false; + error_ptr->Clear(); + return true; } + // port is too large + if (error_ptr) + error_ptr->SetErrorStringWithFormat( + "invalid host:port specification: '%s'", host_and_port.str().c_str()); + return false; } // If this was unsuccessful, then check if it's simply a signed 32-bit Index: lldb/source/Interpreter/CommandObjectRegexCommand.cpp =================================================================== --- lldb/source/Interpreter/CommandObjectRegexCommand.cpp +++ lldb/source/Interpreter/CommandObjectRegexCommand.cpp @@ -30,15 +30,14 @@ CommandReturnObject &result) { EntryCollection::const_iterator pos, end = m_entries.end(); for (pos = m_entries.begin(); pos != end; ++pos) { - RegularExpression::Match regex_match(m_max_matches); - - if (pos->regex.Execute(command, ®ex_match)) { + llvm::SmallVector matches; + if (pos->regex.Execute(command, &matches)) { std::string new_command(pos->command); - std::string match_str; char percent_var[8]; size_t idx, percent_var_idx; for (uint32_t match_idx = 1; match_idx <= m_max_matches; ++match_idx) { - if (regex_match.GetMatchAtIndex(command, match_idx, match_str)) { + if (match_idx < matches.size()) { + const std::string match_str = matches[match_idx].str(); const int percent_var_len = ::snprintf(percent_var, sizeof(percent_var), "%%%u", match_idx); for (idx = 0; (percent_var_idx = new_command.find( Index: lldb/source/Interpreter/OptionArgParser.cpp =================================================================== --- lldb/source/Interpreter/OptionArgParser.cpp +++ lldb/source/Interpreter/OptionArgParser.cpp @@ -211,29 +211,21 @@ // pointer types. static RegularExpression g_symbol_plus_offset_regex( "^(.*)([-\\+])[[:space:]]*(0x[0-9A-Fa-f]+|[0-9]+)[[:space:]]*$"); - RegularExpression::Match regex_match(3); - if (g_symbol_plus_offset_regex.Execute(sref, ®ex_match)) { + + llvm::SmallVector matches; + if (g_symbol_plus_offset_regex.Execute(sref, &matches)) { uint64_t offset = 0; - bool add = true; - std::string name; - std::string str; - if (regex_match.GetMatchAtIndex(s, 1, name)) { - if (regex_match.GetMatchAtIndex(s, 2, str)) { - add = str[0] == '+'; - - if (regex_match.GetMatchAtIndex(s, 3, str)) { - if (!llvm::StringRef(str).getAsInteger(0, offset)) { - Status error; - addr = ToAddress(exe_ctx, name.c_str(), LLDB_INVALID_ADDRESS, - &error); - if (addr != LLDB_INVALID_ADDRESS) { - if (add) - return addr + offset; - else - return addr - offset; - } - } - } + std::string name = matches[1].str(); + std::string sign = matches[2].str(); + std::string str_offset = matches[3].str(); + if (!llvm::StringRef(str_offset).getAsInteger(0, offset)) { + Status error; + addr = ToAddress(exe_ctx, name.c_str(), LLDB_INVALID_ADDRESS, &error); + if (addr != LLDB_INVALID_ADDRESS) { + if (sign[0] == '+') + return addr + offset; + else + return addr - offset; } } } Index: lldb/source/Interpreter/OptionValueRegex.cpp =================================================================== --- lldb/source/Interpreter/OptionValueRegex.cpp +++ lldb/source/Interpreter/OptionValueRegex.cpp @@ -49,13 +49,10 @@ if (m_regex.Compile(value)) { m_value_was_set = true; NotifyValueChanged(); + } else if (llvm::Error err = m_regex.GetError()) { + error.SetErrorString(llvm::toString(std::move(err))); } else { - char regex_error[1024]; - if (m_regex.GetErrorAsCString(regex_error, sizeof(regex_error))) - error.SetErrorString(regex_error); - else - error.SetErrorStringWithFormat("regex error %u", - m_regex.GetErrorCode()); + error.SetErrorString("regex error"); } break; } Index: lldb/source/Plugins/Disassembler/llvm/DisassemblerLLVMC.cpp =================================================================== --- lldb/source/Plugins/Disassembler/llvm/DisassemblerLLVMC.cpp +++ lldb/source/Plugins/Disassembler/llvm/DisassemblerLLVMC.cpp @@ -381,11 +381,10 @@ static RegularExpression s_regex( llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?")); - RegularExpression::Match matches(3); - + llvm::SmallVector matches; if (s_regex.Execute(out_string, &matches)) { - matches.GetMatchAtIndex(out_string.c_str(), 1, m_opcode_name); - matches.GetMatchAtIndex(out_string.c_str(), 2, m_mnemonics); + m_opcode_name = matches[1].str(); + m_mnemonics = matches[2].str(); } } } Index: lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp =================================================================== --- lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp +++ lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp @@ -443,27 +443,28 @@ // returned, `true` otherwise RegularExpression regex; - RegularExpression::Match regex_match(3); + llvm::SmallVector matches; bool matched = false; if (regex.Compile(llvm::StringRef("^([0-9]+),([0-9]+),([0-9]+)$")) && - regex.Execute(coord_s, ®ex_match)) + regex.Execute(coord_s, &matches)) matched = true; else if (regex.Compile(llvm::StringRef("^([0-9]+),([0-9]+)$")) && - regex.Execute(coord_s, ®ex_match)) + regex.Execute(coord_s, &matches)) matched = true; else if (regex.Compile(llvm::StringRef("^([0-9]+)$")) && - regex.Execute(coord_s, ®ex_match)) + regex.Execute(coord_s, &matches)) matched = true; if (!matched) return false; - auto get_index = [&](int idx, uint32_t &i) -> bool { + auto get_index = [&](size_t idx, uint32_t &i) -> bool { std::string group; errno = 0; - if (regex_match.GetMatchAtIndex(coord_s.str().c_str(), idx + 1, group)) - return !llvm::StringRef(group).getAsInteger(10, i); + if (idx + 1 < matches.size()) { + return !llvm::StringRef(matches[idx + 1]).getAsInteger(10, i); + } return true; }; @@ -4147,13 +4148,12 @@ // Matching a comma separated list of known words is fairly // straightforward with PCRE, but we're using ERE, so we end up with a // little ugliness... - RegularExpression::Match match(/* max_matches */ 5); RegularExpression match_type_list( llvm::StringRef("^([[:alpha:]]+)(,[[:alpha:]]+){0,4}$")); assert(match_type_list.IsValid()); - if (!match_type_list.Execute(option_val, &match)) { + if (!match_type_list.Execute(option_val)) { err_str.PutCString( "a comma-separated list of kernel types is required"); return false; Index: lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp =================================================================== --- lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp +++ lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp @@ -137,76 +137,67 @@ // ends at static RegularExpression g_bitfield_regex( llvm::StringRef("([A-Za-z_][A-Za-z0-9_]*)\\[([0-9]+):([0-9]+)\\]")); - RegularExpression::Match regex_match(3); - if (g_bitfield_regex.Execute(slice_str, ®ex_match)) { - llvm::StringRef reg_name_str; - std::string msbit_str; - std::string lsbit_str; - if (regex_match.GetMatchAtIndex(slice_str, 1, reg_name_str) && - regex_match.GetMatchAtIndex(slice_str, 2, msbit_str) && - regex_match.GetMatchAtIndex(slice_str, 3, lsbit_str)) { - const uint32_t msbit = - StringConvert::ToUInt32(msbit_str.c_str(), UINT32_MAX); - const uint32_t lsbit = - StringConvert::ToUInt32(lsbit_str.c_str(), UINT32_MAX); - if (msbit != UINT32_MAX && lsbit != UINT32_MAX) { - if (msbit > lsbit) { - const uint32_t msbyte = msbit / 8; - const uint32_t lsbyte = lsbit / 8; - - ConstString containing_reg_name(reg_name_str); - - const RegisterInfo *containing_reg_info = - GetRegisterInfo(containing_reg_name); - if (containing_reg_info) { - const uint32_t max_bit = containing_reg_info->byte_size * 8; - if (msbit < max_bit && lsbit < max_bit) { - m_invalidate_regs_map[containing_reg_info - ->kinds[eRegisterKindLLDB]] - .push_back(i); - m_value_regs_map[i].push_back( - containing_reg_info->kinds[eRegisterKindLLDB]); - m_invalidate_regs_map[i].push_back( - containing_reg_info->kinds[eRegisterKindLLDB]); - - if (byte_order == eByteOrderLittle) { - success = true; - reg_info.byte_offset = - containing_reg_info->byte_offset + lsbyte; - } else if (byte_order == eByteOrderBig) { - success = true; - reg_info.byte_offset = - containing_reg_info->byte_offset + msbyte; - } else { - llvm_unreachable("Invalid byte order"); - } + llvm::SmallVector matches; + if (g_bitfield_regex.Execute(slice_str, &matches)) { + std::string reg_name_str = matches[1].str(); + std::string msbit_str = matches[2].str(); + std::string lsbit_str = matches[3].str(); + const uint32_t msbit = + StringConvert::ToUInt32(msbit_str.c_str(), UINT32_MAX); + const uint32_t lsbit = + StringConvert::ToUInt32(lsbit_str.c_str(), UINT32_MAX); + if (msbit != UINT32_MAX && lsbit != UINT32_MAX) { + if (msbit > lsbit) { + const uint32_t msbyte = msbit / 8; + const uint32_t lsbyte = lsbit / 8; + + ConstString containing_reg_name(reg_name_str); + + const RegisterInfo *containing_reg_info = + GetRegisterInfo(containing_reg_name); + if (containing_reg_info) { + const uint32_t max_bit = containing_reg_info->byte_size * 8; + if (msbit < max_bit && lsbit < max_bit) { + m_invalidate_regs_map[containing_reg_info + ->kinds[eRegisterKindLLDB]] + .push_back(i); + m_value_regs_map[i].push_back( + containing_reg_info->kinds[eRegisterKindLLDB]); + m_invalidate_regs_map[i].push_back( + containing_reg_info->kinds[eRegisterKindLLDB]); + + if (byte_order == eByteOrderLittle) { + success = true; + reg_info.byte_offset = + containing_reg_info->byte_offset + lsbyte; + } else if (byte_order == eByteOrderBig) { + success = true; + reg_info.byte_offset = + containing_reg_info->byte_offset + msbyte; } else { - if (msbit > max_bit) - printf("error: msbit (%u) must be less than the bitsize " - "of the register (%u)\n", - msbit, max_bit); - else - printf("error: lsbit (%u) must be less than the bitsize " - "of the register (%u)\n", - lsbit, max_bit); + llvm_unreachable("Invalid byte order"); } } else { - printf("error: invalid concrete register \"%s\"\n", - containing_reg_name.GetCString()); + if (msbit > max_bit) + printf("error: msbit (%u) must be less than the bitsize " + "of the register (%u)\n", + msbit, max_bit); + else + printf("error: lsbit (%u) must be less than the bitsize " + "of the register (%u)\n", + lsbit, max_bit); } } else { - printf("error: msbit (%u) must be greater than lsbit (%u)\n", - msbit, lsbit); + printf("error: invalid concrete register \"%s\"\n", + containing_reg_name.GetCString()); } } else { - printf("error: msbit (%u) and lsbit (%u) must be valid\n", msbit, - lsbit); + printf("error: msbit (%u) must be greater than lsbit (%u)\n", + msbit, lsbit); } } else { - // TODO: print error invalid slice string that doesn't follow the - // format - printf("error: failed to extract regex matches for parsing the " - "register bitfield regex\n"); + printf("error: msbit (%u) and lsbit (%u) must be valid\n", msbit, + lsbit); } } else { // TODO: print error invalid slice string that doesn't follow the Index: lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp =================================================================== --- lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp +++ lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp @@ -288,11 +288,8 @@ // Instantiate the regex so we can report any errors. auto regex = RegularExpression(op_arg); - if (!regex.IsValid()) { - char error_text[256]; - error_text[0] = '\0'; - regex.GetErrorAsCString(error_text, sizeof(error_text)); - error.SetErrorString(error_text); + if (llvm::Error err = regex.GetError()) { + error.SetErrorString(llvm::toString(std::move(err))); return FilterRuleSP(); } Index: lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp =================================================================== --- lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -540,19 +540,15 @@ } else if (strstr(producer_cstr, "clang")) { static RegularExpression g_clang_version_regex( llvm::StringRef("clang-([0-9]+)\\.([0-9]+)\\.([0-9]+)")); - RegularExpression::Match regex_match(3); + llvm::SmallVector matches; if (g_clang_version_regex.Execute(llvm::StringRef(producer_cstr), - ®ex_match)) { - std::string str; - if (regex_match.GetMatchAtIndex(producer_cstr, 1, str)) - m_producer_version_major = - StringConvert::ToUInt32(str.c_str(), UINT32_MAX, 10); - if (regex_match.GetMatchAtIndex(producer_cstr, 2, str)) - m_producer_version_minor = - StringConvert::ToUInt32(str.c_str(), UINT32_MAX, 10); - if (regex_match.GetMatchAtIndex(producer_cstr, 3, str)) - m_producer_version_update = - StringConvert::ToUInt32(str.c_str(), UINT32_MAX, 10); + &matches)) { + m_producer_version_major = + StringConvert::ToUInt32(matches[1].str().c_str(), UINT32_MAX, 10); + m_producer_version_minor = + StringConvert::ToUInt32(matches[2].str().c_str(), UINT32_MAX, 10); + m_producer_version_update = + StringConvert::ToUInt32(matches[3].str().c_str(), UINT32_MAX, 10); } m_producer = eProducerClang; } else if (strstr(producer_cstr, "GNU")) Index: lldb/source/Symbol/ObjectFile.cpp =================================================================== --- lldb/source/Symbol/ObjectFile.cpp +++ lldb/source/Symbol/ObjectFile.cpp @@ -575,18 +575,15 @@ FileSpec &archive_file, ConstString &archive_object, bool must_exist) { + llvm::SmallVector matches; RegularExpression g_object_regex(llvm::StringRef("(.*)\\(([^\\)]+)\\)$")); - RegularExpression::Match regex_match(2); if (g_object_regex.Execute(llvm::StringRef::withNullAsEmpty(path_with_object), - ®ex_match)) { - std::string path; - std::string obj; - if (regex_match.GetMatchAtIndex(path_with_object, 1, path) && - regex_match.GetMatchAtIndex(path_with_object, 2, obj)) { - archive_file.SetFile(path, FileSpec::Style::native); - archive_object.SetCString(obj.c_str()); - return !(must_exist && !FileSystem::Instance().Exists(archive_file)); - } + &matches)) { + std::string path = matches[1].str(); + std::string obj = matches[2].str(); + archive_file.SetFile(path, FileSpec::Style::native); + archive_object.SetCString(obj.c_str()); + return !(must_exist && !FileSystem::Instance().Exists(archive_file)); } return false; } Index: lldb/source/Symbol/Variable.cpp =================================================================== --- lldb/source/Symbol/Variable.cpp +++ lldb/source/Symbol/Variable.cpp @@ -390,21 +390,15 @@ default: { static RegularExpression g_regex( llvm::StringRef("^([A-Za-z_:][A-Za-z_0-9:]*)(.*)")); - RegularExpression::Match regex_match(1); - std::string variable_name; + llvm::SmallVector matches; variable_list.Clear(); - if (!g_regex.Execute(variable_expr_path, ®ex_match)) { - error.SetErrorStringWithFormat( - "unable to extract a variable name from '%s'", - variable_expr_path.str().c_str()); - return error; - } - if (!regex_match.GetMatchAtIndex(variable_expr_path, 1, variable_name)) { + if (!g_regex.Execute(variable_expr_path, &matches)) { error.SetErrorStringWithFormat( "unable to extract a variable name from '%s'", variable_expr_path.str().c_str()); return error; } + std::string variable_name = matches[1].str(); if (!callback(baton, variable_name.c_str(), variable_list)) { error.SetErrorString("unknown error"); return error; Index: lldb/source/Target/ThreadPlanStepInRange.cpp =================================================================== --- lldb/source/Target/ThreadPlanStepInRange.cpp +++ lldb/source/Target/ThreadPlanStepInRange.cpp @@ -361,26 +361,17 @@ sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments) .GetCString(); if (frame_function_name) { - size_t num_matches = 0; - Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_STEP)); - if (log) - num_matches = 1; - - RegularExpression::Match regex_match(num_matches); - + llvm::SmallVector matches; bool return_value = - avoid_regexp_to_use->Execute(frame_function_name, ®ex_match); + avoid_regexp_to_use->Execute(frame_function_name, &matches); if (return_value) { - if (log) { - std::string match; - regex_match.GetMatchAtIndex(frame_function_name, 0, match); - LLDB_LOGF(log, - "Stepping out of function \"%s\" because it matches " - "the avoid regexp \"%s\" - match substring: \"%s\".", - frame_function_name, - avoid_regexp_to_use->GetText().str().c_str(), - match.c_str()); - } + std::string match = matches[1].str(); + LLDB_LOGF(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_STEP), + "Stepping out of function \"%s\" because it matches " + "the avoid regexp \"%s\" - match substring: \"%s\".", + frame_function_name, + avoid_regexp_to_use->GetText().str().c_str(), + match.c_str()); } return return_value; } Index: lldb/source/Utility/RegularExpression.cpp =================================================================== --- lldb/source/Utility/RegularExpression.cpp +++ lldb/source/Utility/RegularExpression.cpp @@ -8,30 +8,11 @@ #include "lldb/Utility/RegularExpression.h" -#include "llvm/ADT/StringRef.h" - #include -// Enable enhanced mode if it is available. This allows for things like \d for -// digit, \s for space, and many more, but it isn't available everywhere. -#if defined(REG_ENHANCED) -#define DEFAULT_COMPILE_FLAGS (REG_ENHANCED | REG_EXTENDED) -#else -#define DEFAULT_COMPILE_FLAGS (REG_EXTENDED) -#endif - using namespace lldb_private; -RegularExpression::RegularExpression() : m_re(), m_comp_err(1), m_preg() { - memset(&m_preg, 0, sizeof(m_preg)); -} - -// Constructor that compiles "re" using "flags" and stores the resulting -// compiled regular expression into this object. -RegularExpression::RegularExpression(llvm::StringRef str) - : RegularExpression() { - Compile(str); -} +RegularExpression::RegularExpression(llvm::StringRef str) { Compile(str); } RegularExpression::RegularExpression(const RegularExpression &rhs) : RegularExpression() { @@ -45,114 +26,29 @@ return *this; } -// Destructor -// -// Any previously compiled regular expression contained in this object will be -// freed. -RegularExpression::~RegularExpression() { Free(); } - -// Compile a regular expression using the supplied regular expression text and -// flags. The compiled regular expression lives in this object so that it can -// be readily used for regular expression matches. Execute() can be called -// after the regular expression is compiled. Any previously compiled regular -// expression contained in this object will be freed. -// -// RETURNS -// True if the regular expression compiles successfully, false -// otherwise. bool RegularExpression::Compile(llvm::StringRef str) { - Free(); - - // regcomp() on darwin does not recognize "" as a valid regular expression, - // so we substitute it with an equivalent non-empty one. - m_re = str.empty() ? "()" : str; - m_comp_err = ::regcomp(&m_preg, m_re.c_str(), DEFAULT_COMPILE_FLAGS); - return m_comp_err == 0; + m_regex_text = str.empty() ? "()" : str; + m_regex = llvm::Regex(m_regex_text); + return IsValid(); } -// Execute a regular expression match using the compiled regular expression -// that is already in this object against the match string "s". If any parens -// are used for regular expression matches "match_count" should indicate the -// number of regmatch_t values that are present in "match_ptr". The regular -// expression will be executed using the "execute_flags". -bool RegularExpression::Execute(llvm::StringRef str, Match *match) const { - int err = 1; - if (m_comp_err == 0) { - // Argument to regexec must be null-terminated. - std::string reg_str = str; - if (match) { - err = ::regexec(&m_preg, reg_str.c_str(), match->GetSize(), - match->GetData(), 0); - } else { - err = ::regexec(&m_preg, reg_str.c_str(), 0, nullptr, 0); - } - } - - if (err != 0) { - // The regular expression didn't compile, so clear the matches - if (match) - match->Clear(); - return false; - } - return true; -} - -bool RegularExpression::Match::GetMatchAtIndex(llvm::StringRef s, uint32_t idx, - std::string &match_str) const { - llvm::StringRef match_str_ref; - if (GetMatchAtIndex(s, idx, match_str_ref)) { - match_str = match_str_ref.str(); - return true; - } - return false; -} - -bool RegularExpression::Match::GetMatchAtIndex( - llvm::StringRef s, uint32_t idx, llvm::StringRef &match_str) const { - if (idx < m_matches.size()) { - if (m_matches[idx].rm_eo == -1 && m_matches[idx].rm_so == -1) - return false; - - if (m_matches[idx].rm_eo == m_matches[idx].rm_so) { - // Matched the empty string... - match_str = llvm::StringRef(); - return true; - } else if (m_matches[idx].rm_eo > m_matches[idx].rm_so) { - match_str = s.substr(m_matches[idx].rm_so, - m_matches[idx].rm_eo - m_matches[idx].rm_so); - return true; - } - } - return false; +bool RegularExpression::Execute( + llvm::StringRef str, + llvm::SmallVectorImpl *matches) const { + return m_regex.match(str, matches); } -// Returns true if the regular expression compiled and is ready for execution. -bool RegularExpression::IsValid() const { return m_comp_err == 0; } - -// Returns the text that was used to compile the current regular expression. -llvm::StringRef RegularExpression::GetText() const { return m_re; } - -// Free any contained compiled regular expressions. -void RegularExpression::Free() { - if (m_comp_err == 0) { - m_re.clear(); - regfree(&m_preg); - // Set a compile error since we no longer have a valid regex - m_comp_err = 1; - } +bool RegularExpression::IsValid() const { + std::string discarded; + return m_regex.isValid(discarded); } -size_t RegularExpression::GetErrorAsCString(char *err_str, - size_t err_str_max_len) const { - if (m_comp_err == 0) { - if (err_str && err_str_max_len) - *err_str = '\0'; - return 0; - } - - return ::regerror(m_comp_err, &m_preg, err_str, err_str_max_len); -} +llvm::StringRef RegularExpression::GetText() const { return m_regex_text; } -bool RegularExpression::operator<(const RegularExpression &rhs) const { - return (m_re < rhs.m_re); +llvm::Error RegularExpression::GetError() const { + std::string error; + if (!m_regex.isValid(error)) + return llvm::make_error(llvm::inconvertibleErrorCode(), + error); + return llvm::Error::success(); } Index: lldb/unittests/Utility/CMakeLists.txt =================================================================== --- lldb/unittests/Utility/CMakeLists.txt +++ lldb/unittests/Utility/CMakeLists.txt @@ -21,8 +21,9 @@ RangeMapTest.cpp RangeTest.cpp RegisterValueTest.cpp - ReproducerTest.cpp + RegularExpressionTest.cpp ReproducerInstrumentationTest.cpp + ReproducerTest.cpp ScalarTest.cpp StateTest.cpp StatusTest.cpp Index: lldb/unittests/Utility/RegularExpressionTest.cpp =================================================================== --- /dev/null +++ lldb/unittests/Utility/RegularExpressionTest.cpp @@ -0,0 +1,63 @@ +//===-- RegularExpressionTest.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Utility/RegularExpression.h" +#include "llvm/ADT/SmallVector.h" +#include "gtest/gtest.h" + +using namespace lldb_private; +using namespace llvm; + +TEST(RegularExpression, Valid) { + RegularExpression r1("^[0-9]+$"); + cantFail(r1.GetError()); + EXPECT_TRUE(r1.IsValid()); + EXPECT_EQ("^[0-9]+$", r1.GetText()); + EXPECT_TRUE(r1.Execute("916")); +} + +TEST(RegularExpression, CopyAssignment) { + RegularExpression r1("^[0-9]+$"); + RegularExpression r2 = r1; + cantFail(r2.GetError()); + EXPECT_TRUE(r2.IsValid()); + EXPECT_EQ("^[0-9]+$", r2.GetText()); + EXPECT_TRUE(r2.Execute("916")); +} + +TEST(RegularExpression, Empty) { + RegularExpression r1(""); + cantFail(r1.GetError()); + EXPECT_TRUE(r1.IsValid()); + EXPECT_EQ("()", r1.GetText()); + EXPECT_TRUE(r1.Execute("916")); +} + +TEST(RegularExpression, Invalid) { + RegularExpression r1("a[b-"); + Error err = r1.GetError(); + EXPECT_TRUE(static_cast(err)); + consumeError(std::move(err)); + EXPECT_FALSE(r1.IsValid()); + EXPECT_EQ("a[b-", r1.GetText()); + EXPECT_FALSE(r1.Execute("ab")); +} + +TEST(RegularExpression, Match) { + RegularExpression r1("[0-9]+([a-f])?:([0-9]+)"); + cantFail(r1.GetError()); + EXPECT_TRUE(r1.IsValid()); + EXPECT_EQ("[0-9]+([a-f])?:([0-9]+)", r1.GetText()); + + SmallVector matches; + EXPECT_TRUE(r1.Execute("9a:513b", &matches)); + EXPECT_EQ(3u, matches.size()); + EXPECT_EQ("9a:513", matches[0].str()); + EXPECT_EQ("a", matches[1].str()); + EXPECT_EQ("513", matches[2].str()); +}