Index: source/Plugins/Language/CPlusPlus/CMakeLists.txt =================================================================== --- source/Plugins/Language/CPlusPlus/CMakeLists.txt +++ source/Plugins/Language/CPlusPlus/CMakeLists.txt @@ -1,6 +1,7 @@ add_lldb_library(lldbPluginCPlusPlusLanguage PLUGIN BlockPointer.cpp CPlusPlusLanguage.cpp + CPlusPlusNameParser.cpp CxxStringTypes.cpp LibCxx.cpp LibCxxAtomic.cpp Index: source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h =================================================================== --- source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h +++ source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h @@ -29,20 +29,13 @@ public: class MethodName { public: - enum Type { - eTypeInvalid, - eTypeUnknownMethod, - eTypeClassMethod, - eTypeInstanceMethod - }; - MethodName() : m_full(), m_basename(), m_context(), m_arguments(), m_qualifiers(), - m_type(eTypeInvalid), m_parsed(false), m_parse_error(false) {} + m_parsed(false), m_parse_error(false) {} MethodName(const ConstString &s) : m_full(s), m_basename(), m_context(), m_arguments(), m_qualifiers(), - m_type(eTypeInvalid), m_parsed(false), m_parse_error(false) {} + m_parsed(false), m_parse_error(false) {} void Clear(); @@ -51,13 +44,9 @@ Parse(); if (m_parse_error) return false; - if (m_type == eTypeInvalid) - return false; return (bool)m_full; } - Type GetType() const { return m_type; } - const ConstString &GetFullName() const { return m_full; } std::string GetScopeQualifiedName(); @@ -80,7 +69,6 @@ llvm::StringRef m_context; // Decl context: "lldb::SBTarget" llvm::StringRef m_arguments; // Arguments: "(unsigned int)" llvm::StringRef m_qualifiers; // Qualifiers: "const" - Type m_type; bool m_parsed; bool m_parse_error; }; @@ -121,7 +109,7 @@ // If the name is a lone C identifier (e.g. C) or a qualified C identifier // (e.g. A::B::C) it will return true, // and identifier will be the identifier (C and C respectively) and the - // context will be "" and "A::B::" respectively. + // context will be "" and "A::B" respectively. // If the name fails the heuristic matching for a qualified or unqualified // C/C++ identifier, then it will return false // and identifier and context will be unchanged. Index: source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp =================================================================== --- source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -21,7 +21,6 @@ // Other libraries and framework includes #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Threading.h" // Project includes #include "lldb/Core/PluginManager.h" @@ -36,6 +35,7 @@ #include "lldb/Utility/RegularExpression.h" #include "BlockPointer.h" +#include "CPlusPlusNameParser.h" #include "CxxStringTypes.h" #include "LibCxx.h" #include "LibCxxAtomic.h" @@ -85,164 +85,24 @@ m_context = llvm::StringRef(); m_arguments = llvm::StringRef(); m_qualifiers = llvm::StringRef(); - m_type = eTypeInvalid; m_parsed = false; m_parse_error = false; } -bool ReverseFindMatchingChars(const llvm::StringRef &s, - const llvm::StringRef &left_right_chars, - size_t &left_pos, size_t &right_pos, - size_t pos = llvm::StringRef::npos) { - assert(left_right_chars.size() == 2); - left_pos = llvm::StringRef::npos; - const char left_char = left_right_chars[0]; - const char right_char = left_right_chars[1]; - pos = s.find_last_of(left_right_chars, pos); - if (pos == llvm::StringRef::npos || s[pos] == left_char) - return false; - right_pos = pos; - uint32_t depth = 1; - while (pos > 0 && depth > 0) { - pos = s.find_last_of(left_right_chars, pos); - if (pos == llvm::StringRef::npos) - return false; - if (s[pos] == left_char) { - if (--depth == 0) { - left_pos = pos; - return left_pos < right_pos; - } - } else if (s[pos] == right_char) { - ++depth; - } - } - return false; -} - -static bool IsValidBasename(const llvm::StringRef &basename) { - // Check that the basename matches with the following regular expression or is - // an operator name: - // "^~?([A-Za-z_][A-Za-z_0-9]*)(<.*>)?$" - // We are using a hand written implementation because it is significantly more - // efficient then - // using the general purpose regular expression library. - size_t idx = 0; - if (basename.size() > 0 && basename[0] == '~') - idx = 1; - - if (basename.size() <= idx) - return false; // Empty string or "~" - - if (!std::isalpha(basename[idx]) && basename[idx] != '_') - return false; // First charater (after removing the possible '~'') isn't in - // [A-Za-z_] - - // Read all characters matching [A-Za-z_0-9] - ++idx; - while (idx < basename.size()) { - if (!std::isalnum(basename[idx]) && basename[idx] != '_') - break; - ++idx; - } - - // We processed all characters. It is a vaild basename. - if (idx == basename.size()) - return true; - - // Check for basename with template arguments - // TODO: Improve the quality of the validation with validating the template - // arguments - if (basename[idx] == '<' && basename.back() == '>') - return true; - - // Check if the basename is a vaild C++ operator name - if (!basename.startswith("operator")) - return false; - - static RegularExpression g_operator_regex( - llvm::StringRef("^(operator)( " - "?)([A-Za-z_][A-Za-z_0-9]*|\\(\\)|" - "\\[\\]|[\\^<>=!\\/" - "*+-]+)(<.*>)?(\\[\\])?$")); - std::string basename_str(basename.str()); - return g_operator_regex.Execute(basename_str, nullptr); -} - void CPlusPlusLanguage::MethodName::Parse() { if (!m_parsed && m_full) { - // ConstString mangled; - // m_full.GetMangledCounterpart(mangled); - // printf ("\n parsing = '%s'\n", m_full.GetCString()); - // if (mangled) - // printf (" mangled = '%s'\n", mangled.GetCString()); - m_parse_error = false; - m_parsed = true; - llvm::StringRef full(m_full.GetCString()); - - size_t arg_start, arg_end; - llvm::StringRef parens("()", 2); - if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) { - m_arguments = full.substr(arg_start, arg_end - arg_start + 1); - if (arg_end + 1 < full.size()) - m_qualifiers = full.substr(arg_end + 1); - if (arg_start > 0) { - size_t basename_end = arg_start; - size_t context_start = 0; - size_t context_end = llvm::StringRef::npos; - if (basename_end > 0 && full[basename_end - 1] == '>') { - // TODO: handle template junk... - // Templated function - size_t template_start, template_end; - llvm::StringRef lt_gt("<>", 2); - if (ReverseFindMatchingChars(full, lt_gt, template_start, - template_end, basename_end)) { - // Check for templated functions that include return type like: - // 'void foo()' - context_start = full.rfind(' ', template_start); - if (context_start == llvm::StringRef::npos) - context_start = 0; - else - ++context_start; - - context_end = full.rfind(':', template_start); - if (context_end == llvm::StringRef::npos || - context_end < context_start) - context_end = context_start; - } else { - context_end = full.rfind(':', basename_end); - } - } else if (context_end == llvm::StringRef::npos) { - context_end = full.rfind(':', basename_end); - } - - if (context_end == llvm::StringRef::npos) - m_basename = full.substr(0, basename_end); - else { - if (context_start < context_end) - m_context = - full.substr(context_start, context_end - 1 - context_start); - const size_t basename_begin = context_end + 1; - m_basename = - full.substr(basename_begin, basename_end - basename_begin); - } - m_type = eTypeUnknownMethod; - } else { - m_parse_error = true; - return; - } - - if (!IsValidBasename(m_basename)) { - // The C++ basename doesn't match our regular expressions so this can't - // be a valid C++ method, clear everything out and indicate an error - m_context = llvm::StringRef(); - m_basename = llvm::StringRef(); - m_arguments = llvm::StringRef(); - m_qualifiers = llvm::StringRef(); - m_parse_error = true; - } + CPlusPlusNameParser parser(m_full.GetStringRef()); + auto function = parser.ParseAsFunctionDefinition(); + if (function.hasValue()) { + m_basename = function.getValue().m_name.m_basename; + m_context = function.getValue().m_name.m_context; + m_arguments = function.getValue().m_arguments; + m_qualifiers = function.getValue().m_qualifiers; + m_parse_error = false; } else { m_parse_error = true; } + m_parsed = true; } } @@ -273,14 +133,13 @@ std::string CPlusPlusLanguage::MethodName::GetScopeQualifiedName() { if (!m_parsed) Parse(); - if (m_basename.empty() || m_context.empty()) - return std::string(); + if (m_context.empty()) + return m_basename; std::string res; res += m_context; res += "::"; res += m_basename; - return res; } @@ -296,13 +155,11 @@ bool CPlusPlusLanguage::ExtractContextAndIdentifier( const char *name, llvm::StringRef &context, llvm::StringRef &identifier) { - static RegularExpression g_basename_regex(llvm::StringRef( - "^(([A-Za-z_][A-Za-z_0-9]*::)*)(~?[A-Za-z_~][A-Za-z_0-9]*)$")); - RegularExpression::Match match(4); - if (g_basename_regex.Execute(llvm::StringRef::withNullAsEmpty(name), - &match)) { - match.GetMatchAtIndex(name, 1, context); - match.GetMatchAtIndex(name, 3, identifier); + CPlusPlusNameParser parser(name); + auto full_name = parser.ParseAsFullName(); + if (full_name.hasValue()) { + identifier = full_name.getValue().m_basename; + context = full_name.getValue().m_context; return true; } return false; Index: source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h =================================================================== --- /dev/null +++ source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h @@ -0,0 +1,152 @@ +//===-- CPlusPlusNameParser.h -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef liblldb_CPlusPlusNameParser_h_ +#define liblldb_CPlusPlusNameParser_h_ + +// C Includes +// C++ Includes + +// Other libraries and framework includes +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +// Project includes +#include "lldb/Utility/ConstString.h" +#include "lldb/lldb-private.h" + +namespace lldb_private { + +// Helps to validate and obtain various parts of C++ definitions. +class CPlusPlusNameParser { +public: + CPlusPlusNameParser(llvm::StringRef text) : m_text(text) { ExtractTokens(); } + + struct ParsedName { + llvm::StringRef m_basename; + llvm::StringRef m_context; + }; + + struct ParsedFunction { + ParsedName m_name; + llvm::StringRef m_arguments; + llvm::StringRef m_qualifiers; + }; + + // Treats given text as a function definition and parses it. + // Function definition might or might not have a return type and this should + // change parsing result. + // Examples: + // main(int, chat const*) + // T fun(int, bool) + // std::vector::push_back(int) + // int& map>::operator[](short) const + // int (*get_function(const chat *))() + llvm::Optional ParseAsFunctionDefinition(); + + // Treats given text as a potentially nested name of C++ entity (function, + // class, field) and parses it. + // Examples: + // main + // fun + // std::vector::push_back + // map>::operator[] + // func(int, C&)::nested_class::method + llvm::Optional ParseAsFullName(); + +private: + llvm::StringRef m_text; + llvm::SmallVector m_tokens; + int m_next_token_index = 0; + + struct Range { + int m_begin_index = 0; + int m_end_index = 0; + + Range() {} + + Range(int begin, int end) : m_begin_index(begin), m_end_index(end) {} + + int size() const { return m_end_index - m_begin_index; } + + bool empty() const { return size() == 0; } + }; + + struct ParsedNameRanges { + Range m_basename_range; + Range m_context_range; + }; + + // Bookmark automatically restores parsing position when destructed + // unless it's manually removed with Remove(). + class Bookmark { + public: + Bookmark(int &position) + : m_position(position), m_position_value(position) {} + + void Remove() { m_restore = false; } + + int GetSavedPosition() { return m_position_value; } + + ~Bookmark() { + if (m_restore) { + m_position = m_position_value; + } + } + + private: + int &m_position; + int m_position_value; + bool m_restore = true; + }; + + bool HasMoreTokens(); + void Advance(); + void TakeBack(); + bool ConsumeToken(clang::tok::TokenKind kind); + template bool ConsumeToken(Ts... kinds); + Bookmark SetBookmark(); + int GetCurrentPosition(); + clang::Token &Peek(); + llvm::Optional ParseFunctionImpl(bool expect_return_type); + + // Parses functions returning function pointers 'string (*f(int x))(float y)' + llvm::Optional ParseFuncPtr(bool expect_return_type); + + bool ConsumeArguments(); + + bool ConsumeAnonymousNamespace(); + bool ConsumeBrackets(clang::tok::TokenKind left, clang::tok::TokenKind right); + + bool ConsumeOperator(); + + // Skips 'const' and 'volatile' + void SkipTypeQualifiers(); + + // Skips 'const', 'volatile', '&', '&&' in the end of the function. + void SkipFunctionQualifiers(); + + bool ConsumeBuiltinType(); + + void SkipPtrsAndRefs(); + + // Consume through things like 'const * const &' + bool ConsumePtrsAndRefs(); + bool ConsumeTypename(); + + llvm::Optional ParseFullNameImpl(); + llvm::StringRef GetTextForRange(const Range &range); + void ExtractTokens(); +}; + +} // namespace lldb_private + +#endif // liblldb_CPlusPlusNameParser_h_ \ No newline at end of file Index: source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp =================================================================== --- /dev/null +++ source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp @@ -0,0 +1,548 @@ +//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "CPlusPlusNameParser.h" + +#include + +#include "clang/Basic/IdentifierTable.h" +#include "llvm/Support/Threading.h" + +using namespace lldb; +using namespace lldb_private; +using llvm::Optional; +using llvm::None; +using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction; +using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName; +namespace tok = clang::tok; + +llvm::Optional +CPlusPlusNameParser::ParseAsFunctionDefinition() { + m_next_token_index = 0; + llvm::Optional result(None); + { + Bookmark start_position = SetBookmark(); + result = ParseFunctionImpl(false); + if (result.hasValue() && !HasMoreTokens()) + return result; + } + + result = ParseFuncPtr(true); + if (result.hasValue()) + return result; + + result = ParseFunctionImpl(true); + return result; +} + +llvm::Optional CPlusPlusNameParser::ParseAsFullName() { + m_next_token_index = 0; + llvm::Optional name_ranges = ParseFullNameImpl(); + if (!name_ranges.hasValue()) + return None; + ParsedName result; + result.m_basename = GetTextForRange(name_ranges.getValue().m_basename_range); + result.m_context = GetTextForRange(name_ranges.getValue().m_context_range); + return result; +} + +bool CPlusPlusNameParser::HasMoreTokens() { + return m_next_token_index < static_cast(m_tokens.size()); +} + +void CPlusPlusNameParser::Advance() { ++m_next_token_index; } + +void CPlusPlusNameParser::TakeBack() { --m_next_token_index; } + +bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) { + if (!HasMoreTokens()) + return false; + + if (!Peek().is(kind)) + return false; + + Advance(); + return true; +} + +template bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { + if (!HasMoreTokens()) + return false; + + if (!Peek().isOneOf(kinds...)) + return false; + + Advance(); + return true; +} + +CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() { + return Bookmark(m_next_token_index); +} + +int CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; } + +clang::Token &CPlusPlusNameParser::Peek() { + assert(HasMoreTokens()); + return m_tokens[m_next_token_index]; +} + +llvm::Optional +CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { + Bookmark start_position = SetBookmark(); + if (expect_return_type) { + // Consume return type. + if (!ConsumeTypename()) + return None; + } + + auto maybe_name = ParseFullNameImpl(); + if (!maybe_name.hasValue()) { + return None; + } + + int argument_start = GetCurrentPosition(); + if (!ConsumeArguments()) { + return None; + } + + int qualifiers_start = GetCurrentPosition(); + SkipFunctionQualifiers(); + int end_position = GetCurrentPosition(); + + ParsedFunction result; + result.m_name.m_basename = + GetTextForRange(maybe_name.getValue().m_basename_range); + result.m_name.m_context = + GetTextForRange(maybe_name.getValue().m_context_range); + result.m_arguments = GetTextForRange(Range(argument_start, qualifiers_start)); + result.m_qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); + start_position.Remove(); + return result; +} + +// Parses functions returning function pointers 'string (*f(int x))(float y)' +llvm::Optional +CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) { + Bookmark start_position = SetBookmark(); + if (expect_return_type) { + // Consume return type. + if (!ConsumeTypename()) + return None; + } + + if (!ConsumeToken(tok::l_paren)) + return None; + if (!ConsumePtrsAndRefs()) + return None; + + { + Bookmark before_inner_function_pos = SetBookmark(); + auto maybe_inner_function_name = ParseFunctionImpl(false); + if (maybe_inner_function_name.hasValue()) + if (ConsumeToken(tok::r_paren)) + if (ConsumeArguments()) { + SkipFunctionQualifiers(); + start_position.Remove(); + before_inner_function_pos.Remove(); + return maybe_inner_function_name; + } + } + + auto maybe_inner_function_ptr_name = ParseFuncPtr(false); + if (maybe_inner_function_ptr_name.hasValue()) + if (ConsumeToken(tok::r_paren)) + if (ConsumeArguments()) { + SkipFunctionQualifiers(); + start_position.Remove(); + return maybe_inner_function_ptr_name; + } + return None; +} + +bool CPlusPlusNameParser::ConsumeArguments() { + return ConsumeBrackets(tok::l_paren, tok::r_paren); +} + +bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { + Bookmark start_position = SetBookmark(); + if (!ConsumeToken(tok::l_paren)) { + return false; + } + static ConstString g_anonymous("anonymous"); + if (HasMoreTokens() && Peek().is(tok::identifier) && + Peek().getIdentifierInfo()->getName() == g_anonymous.GetStringRef()) { + Advance(); + } else { + return false; + } + + if (!ConsumeToken(tok::kw_namespace)) { + return false; + } + + if (!ConsumeToken(tok::r_paren)) { + return false; + } + start_position.Remove(); + return true; +} + +bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, + tok::TokenKind right) { + Bookmark start_position = SetBookmark(); + if (!HasMoreTokens() || Peek().getKind() != left) + return false; + tok::TokenKind prev_kind = Peek().getKind(); + Advance(); + + int counter = 1; + while (HasMoreTokens() && counter > 0) { + tok::TokenKind kind = Peek().getKind(); + if (kind == right) + --counter; + else if (kind == left) + ++counter; + // Handle special case of operator>>() in templates. + else if (right == tok::greater) { + if (kind == tok::greatergreater && prev_kind != tok::kw_operator) + counter -= 2; + } + prev_kind = kind; + Advance(); + } + + assert(counter >= 0); + if (counter > 0) { + return false; + } + start_position.Remove(); + return true; +} + +bool CPlusPlusNameParser::ConsumeOperator() { + Bookmark start_position = SetBookmark(); + if (!ConsumeToken(tok::kw_operator)) + return false; + + if (!HasMoreTokens()) { + return false; + } + + const auto &token = Peek(); + switch (token.getKind()) { + case tok::kw_new: + case tok::kw_delete: + // This is 'new' or 'delete' operators. + Advance(); + // Check for array new/delete. + if (HasMoreTokens() && Peek().is(tok::l_square)) { + // Consume the '[' and ']'. + if (!ConsumeBrackets(tok::l_square, tok::r_square)) + return false; + } + break; + +#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ + case tok::Token: \ + Advance(); \ + break; +#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly) +#include "clang/Basic/OperatorKinds.def" +#undef OVERLOADED_OPERATOR +#undef OVERLOADED_OPERATOR_MULTI + + case tok::l_paren: + // Call operator consume '(' ... ')'. + if (ConsumeBrackets(tok::l_paren, tok::r_paren)) + break; + return false; + + case tok::l_square: + // This is a [] operator. + // Consume the '[' and ']'. + if (ConsumeBrackets(tok::l_square, tok::r_square)) + break; + return false; + + default: + // This might be a cast operator. + if (ConsumeTypename()) + break; + return false; + } + start_position.Remove(); + return true; +} + +// Skips 'const' and 'volatile' +void CPlusPlusNameParser::SkipTypeQualifiers() { + while (ConsumeToken(tok::kw_const, tok::kw_volatile)) + ; +} + +// Skips 'const', 'volatile', '&', '&&' in the end of the function. +void CPlusPlusNameParser::SkipFunctionQualifiers() { + while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)) + ; +} + +bool CPlusPlusNameParser::ConsumeBuiltinType() { + bool result = false; + bool continue_parsing = true; + // Built-in types can be made of a few keywords + // like 'unsigned long long int'. This function + // consumes all built-in type keywords without + // checking if they make sense like 'unsigned char void'. + while (continue_parsing && HasMoreTokens()) { + switch (Peek().getKind()) { + case tok::kw_short: + case tok::kw_long: + case tok::kw___int64: + case tok::kw___int128: + case tok::kw_signed: + case tok::kw_unsigned: + case tok::kw_void: + case tok::kw_char: + case tok::kw_int: + case tok::kw_half: + case tok::kw_float: + case tok::kw_double: + case tok::kw___float128: + case tok::kw_wchar_t: + case tok::kw_bool: + case tok::kw_char16_t: + case tok::kw_char32_t: + result = true; + Advance(); + break; + default: + continue_parsing = false; + break; + } + } + return result; +} + +void CPlusPlusNameParser::SkipPtrsAndRefs() { + // Ignoring result. + ConsumePtrsAndRefs(); +} + +// Consume through things like 'const * const &' +bool CPlusPlusNameParser::ConsumePtrsAndRefs() { + bool found = false; + SkipTypeQualifiers(); + while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const, + tok::kw_volatile)) { + found = true; + SkipTypeQualifiers(); + } + return found; +} + +bool CPlusPlusNameParser::ConsumeTypename() { + Bookmark start_position = SetBookmark(); + SkipTypeQualifiers(); + if (!ConsumeBuiltinType()) { + if (!ParseFullNameImpl().hasValue()) + return false; + } + SkipPtrsAndRefs(); + start_position.Remove(); + return true; +} + +llvm::Optional +CPlusPlusNameParser::ParseFullNameImpl() { + // Name parsing state machine. + enum class State { + Beginning, // start of the name + AfterTwoColons, // right after :: + AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+) + AfterTemplate, // right after template brackets () + AfterOperator, // right after name of C++ operator + }; + + Bookmark start_position = SetBookmark(); + State state = State::Beginning; + bool continue_parsing = true; + int last_coloncolon_position = -1; + + while (continue_parsing && HasMoreTokens()) { + const auto &token = Peek(); + switch (token.getKind()) { + case tok::identifier: // Just a name. + if (state != State::Beginning && state != State::AfterTwoColons) { + continue_parsing = false; + break; + } + Advance(); + state = State::AfterIdentifier; + break; + case tok::l_paren: { + if (state == State::Beginning || state == State::AfterTwoColons) { + // (anonymous namespace) + if (ConsumeAnonymousNamespace()) { + state = State::AfterIdentifier; + break; + } + } + + // Type declared inside a function 'func()::Type' + if (state != State::AfterIdentifier && state != State::AfterTemplate && + state != State::AfterOperator) { + continue_parsing = false; + break; + } + Bookmark l_paren_position = SetBookmark(); + // Consume the '(' ... ') const'. + if (!ConsumeArguments()) { + continue_parsing = false; + break; + } + SkipFunctionQualifiers(); + + // Consume '::' + int coloncolon_position = GetCurrentPosition(); + if (!ConsumeToken(tok::coloncolon)) { + continue_parsing = false; + break; + } + l_paren_position.Remove(); + last_coloncolon_position = coloncolon_position; + state = State::AfterTwoColons; + break; + } + case tok::coloncolon: // Type nesting delimiter. + if (state != State::Beginning && state != State::AfterIdentifier && + state != State::AfterTemplate) { + continue_parsing = false; + break; + } + last_coloncolon_position = GetCurrentPosition(); + Advance(); + state = State::AfterTwoColons; + break; + case tok::less: // Template brackets. + if (state != State::AfterIdentifier && state != State::AfterOperator) { + continue_parsing = false; + break; + } + if (!ConsumeBrackets(tok::less, tok::greater)) { + continue_parsing = false; + break; + } + state = State::AfterTemplate; + break; + case tok::kw_operator: // C++ operator overloading. + if (state != State::Beginning && state != State::AfterTwoColons) { + continue_parsing = false; + break; + } + if (!ConsumeOperator()) { + continue_parsing = false; + break; + } + state = State::AfterOperator; + break; + case tok::tilde: // Destructor. + if (state != State::Beginning && state != State::AfterTwoColons) { + continue_parsing = false; + break; + } + Advance(); + if (ConsumeToken(tok::identifier)) { + state = State::AfterIdentifier; + } else { + TakeBack(); + continue_parsing = false; + } + break; + default: + continue_parsing = false; + break; + } + } + + if (state == State::AfterIdentifier || state == State::AfterOperator || + state == State::AfterTemplate) { + ParsedNameRanges result; + if (last_coloncolon_position != -1) { + result.m_context_range = + Range(start_position.GetSavedPosition(), last_coloncolon_position); + result.m_basename_range = + Range(last_coloncolon_position + 1, GetCurrentPosition()); + } else { + result.m_basename_range = + Range(start_position.GetSavedPosition(), GetCurrentPosition()); + } + start_position.Remove(); + return result; + } else { + return None; + } +} + +llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) { + if (range.empty()) + return llvm::StringRef(); + assert(range.m_begin_index < range.m_end_index); + assert(range.m_begin_index < (int)m_tokens.size()); + assert(range.m_end_index <= (int)m_tokens.size()); + clang::Token &first_token = m_tokens[range.m_begin_index]; + clang::Token &last_token = m_tokens[range.m_end_index - 1]; + clang::SourceLocation begin_location = first_token.getLocation(); + clang::SourceLocation end_location = last_token.getLocation(); + const char *begin_ptr = m_text.data() + begin_location.getRawEncoding(); + const char *end_ptr = + m_text.data() + end_location.getRawEncoding() + last_token.getLength(); + return llvm::StringRef(begin_ptr, end_ptr - begin_ptr); +} + +static const clang::LangOptions &GetLangOptions() { + static clang::LangOptions g_options; + static llvm::once_flag g_once_flag; + llvm::call_once(g_once_flag, []() { + g_options.LineComment = true; + g_options.C99 = true; + g_options.C11 = true; + g_options.CPlusPlus = true; + g_options.CPlusPlus11 = true; + g_options.CPlusPlus14 = true; + g_options.CPlusPlus1z = true; + }); + return g_options; +} + +static void LookupIdentifier(clang::Token &token) { + static clang::IdentifierTable g_id_table(GetLangOptions()); + // Currently we don't expect this to be used by several threads at + // the same time, but we want to protect static IdentifierTable anyway. + // We want MethodName to be easy to use. + static std::mutex g_mutex; + + if (token.is(clang::tok::raw_identifier)) { + std::lock_guard guard(g_mutex); + clang::IdentifierInfo &info = g_id_table.get(token.getRawIdentifier()); + token.setIdentifierInfo(&info); + token.setKind(info.getTokenID()); + } +} + +void CPlusPlusNameParser::ExtractTokens() { + clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(), + m_text.data(), m_text.data() + m_text.size()); + clang::Token token; + for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof); + lexer.LexFromRawLexer(token)) { + LookupIdentifier(token); + m_tokens.push_back(token); + } +} Index: unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp =================================================================== --- unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp +++ unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp @@ -10,31 +10,130 @@ #include "gtest/gtest.h" #include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h" +#include "lldb/Core/StreamFile.h" using namespace lldb_private; -TEST(CPlusPlusLanguage, MethodName) { +TEST(CPlusPlusLanguage, MethodNameParsing) { struct TestCase { std::string input; std::string context, basename, arguments, qualifiers, scope_qualified_name; }; TestCase test_cases[] = { + {"void f(int)", "", "f", "(int)", "", "f"}, + {"main(int, char *[]) ", "", "main", "(int, char *[])", "", "main"}, {"foo::bar(baz)", "foo", "bar", "(baz)", "", "foo::bar"}, + {"foo::~bar(baz)", "foo", "~bar", "(baz)", "", "foo::~bar"}, + + // Operators {"std::basic_ostream >& " "std::operator<< >" "(std::basic_ostream >&, char const*)", "std", "operator<< >", "(std::basic_ostream >&, char const*)", "", - "std::operator<< >"}}; + "std::operator<< >"}, + {"operator delete[](void*, clang::ASTContext const&, unsigned long)", "", + "operator delete[]", "(void*, clang::ASTContext const&, unsigned long)", + "", "operator delete[]"}, + {"llvm::Optional::operator bool() const", + "llvm::Optional", "operator bool", "()", "const", + "llvm::Optional::operator bool"}, + {"(anonymous namespace)::FactManager::operator[](unsigned short)", + "(anonymous namespace)::FactManager", "operator[]", "(unsigned short)", + "", "(anonymous namespace)::FactManager::operator[]"}, + {"const int& std::map>::operator[](short) const", + "std::map>", "operator[]", "(short)", "const", + "std::map>::operator[]"}, + {"CompareInsn::operator()(llvm::StringRef, InsnMatchEntry const&)", + "CompareInsn", "operator()", "(llvm::StringRef, InsnMatchEntry const&)", + "", "CompareInsn::operator()"}, + {"llvm::Optional::operator*() const &", + "llvm::Optional", "operator*", "()", "const &", + "llvm::Optional::operator*"}, + // Internal classes + {"operator<<(Cls, Cls)::Subclass::function()", + "operator<<(Cls, Cls)::Subclass", "function", "()", "", + "operator<<(Cls, Cls)::Subclass::function"}, + {"SAEC::checkFunction(context&) const::CallBack::CallBack(int)", + "SAEC::checkFunction(context&) const::CallBack", "CallBack", "(int)", "", + "SAEC::checkFunction(context&) const::CallBack::CallBack"}, + // Anonymous namespace + {"XX::(anonymous namespace)::anon_class::anon_func() const", + "XX::(anonymous namespace)::anon_class", "anon_func", "()", "const", + "XX::(anonymous namespace)::anon_class::anon_func"}, + + // Function pointers + {"string (*f(vector&&))(float)", "", "f", "(vector&&)", "", + "f"}, + {"void (*&std::_Any_data::_M_access())()", "std::_Any_data", + "_M_access", "()", "", + "std::_Any_data::_M_access"}, + {"void (*(*(*(*(*(*(*(* const&func1(int))())())())())())())())()", "", + "func1", "(int)", "", "func1"}, + + // Double >> in template + {"void llvm::PM>::" + "addPass(llvm::VP)", + "llvm::PM>", "addPass", + "(llvm::VP)", "", + "llvm::PM>::" + "addPass"}, + {"void std::vector >" + "::_M_emplace_back_aux(Class const&)", + "std::vector >", + "_M_emplace_back_aux", "(Class const&)", "", + "std::vector >::" + "_M_emplace_back_aux"}, + {"unsigned long llvm::countTrailingOnes" + "(unsigned int, llvm::ZeroBehavior)", + "llvm", "countTrailingOnes", + "(unsigned int, llvm::ZeroBehavior)", "", + "llvm::countTrailingOnes"}}; for (const auto &test : test_cases) { CPlusPlusLanguage::MethodName method(ConstString(test.input)); - EXPECT_TRUE(method.IsValid()); - EXPECT_EQ(test.context, method.GetContext()); - EXPECT_EQ(test.basename, method.GetBasename()); - EXPECT_EQ(test.arguments, method.GetArguments()); - EXPECT_EQ(test.qualifiers, method.GetQualifiers()); + EXPECT_TRUE(method.IsValid()) << test.input; + EXPECT_EQ(test.context, method.GetContext()) << method.GetContext(); + EXPECT_EQ(test.basename, method.GetBasename()) << method.GetBasename(); + EXPECT_EQ(test.arguments, method.GetArguments()) << method.GetArguments(); + EXPECT_EQ(test.qualifiers, method.GetQualifiers()) + << method.GetQualifiers(); EXPECT_EQ(test.scope_qualified_name, method.GetScopeQualifiedName()); } } + +TEST(CPlusPlusLanguage, ExtractContextAndIdentifier) { + struct TestCase { + std::string input; + std::string context, basename; + }; + + TestCase test_cases[] = { + {"main", "", "main"}, + {"foo01::bar", "foo01", "bar"}, + {"foo::~bar", "foo", "~bar"}, + {"std::vector::push_back", "std::vector", "push_back"}, + {"operator<<(Cls, Cls)::Subclass::function", + "operator<<(Cls, Cls)::Subclass", "function"}, + {"std::vector>" + "::_M_emplace_back_aux", + "std::vector>", + "_M_emplace_back_aux"}}; + + llvm::StringRef context, basename; + for (const auto &test : test_cases) { + EXPECT_TRUE(CPlusPlusLanguage::ExtractContextAndIdentifier( + test.input.c_str(), context, basename)) + << test.input; + EXPECT_EQ(test.context, context) << context; + EXPECT_EQ(test.basename, basename) << basename; + } + + EXPECT_FALSE(CPlusPlusLanguage::ExtractContextAndIdentifier("void", context, + basename)); + EXPECT_FALSE( + CPlusPlusLanguage::ExtractContextAndIdentifier("321", context, basename)); + EXPECT_FALSE( + CPlusPlusLanguage::ExtractContextAndIdentifier("", context, basename)); +}