Index: lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/Makefile =================================================================== --- /dev/null +++ lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/Makefile @@ -0,0 +1,3 @@ +LEVEL = ../../make +C_SOURCES := main.c +include $(LEVEL)/Makefile.rules Index: lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/TestDollarInVariable.py =================================================================== --- /dev/null +++ lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/TestDollarInVariable.py @@ -0,0 +1,5 @@ +from lldbsuite.test import lldbinline +from lldbsuite.test import decorators + +lldbinline.MakeInlineTest(__file__, globals(), None) + Index: lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/main.c =================================================================== --- /dev/null +++ lldb/packages/Python/lldbsuite/test/expression_command/dollar-in-variable/main.c @@ -0,0 +1,21 @@ +// Make sure we correctly handle $ in variable names. + +int main() { + // Some variables that might conflict with our variables below. + int __lldb_expr_result = 2; + int $$foo = 1; + int R0 = 2; + + // Some variables with dollar signs that should work (and shadow + // any built-in LLDB variables). + int $__lldb_expr_result = 11; + int $foo = 12; + int $R0 = 13; + int $0 = 14; + + //%self.expect("expr $__lldb_expr_result", substrs=['(int) $0 = 11']) + //%self.expect("expr $foo", substrs=['(int)', ' = 12']) + //%self.expect("expr $R0", substrs=['(int)', ' = 13']) + //%self.expect("expr int $foo = 123", error=True, substrs=["declaration conflicts"]) + return 0; //%self.expect("expr $0", substrs=['(int)', ' = 14']) +} Index: lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/Makefile =================================================================== --- /dev/null +++ lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/Makefile @@ -0,0 +1,4 @@ +LEVEL = ../../make +CXX_SOURCES := main.cpp +CXX_FLAGS_EXTRA := -finput-charset=UTF-8 -fextended-identifiers +include $(LEVEL)/Makefile.rules Index: lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/TestUnicodeInVariable.py =================================================================== --- /dev/null +++ lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/TestUnicodeInVariable.py @@ -0,0 +1,5 @@ +from lldbsuite.test import lldbinline +from lldbsuite.test import decorators + +lldbinline.MakeInlineTest(__file__, globals(), None) + Index: lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/main.cpp =================================================================== --- /dev/null +++ lldb/packages/Python/lldbsuite/test/expression_command/unicode-in-variable/main.cpp @@ -0,0 +1,17 @@ +// Make sure we correctly handle unicode in variable names. + +struct A { + // We need a member variable in the context that could shadow our local + // variable. If our optimization code fails to handle this, then we won't + // correctly inject our local variable so that it won't get shadowed. + int foob\u00E1r = 2; + int foo() { + int foob\u00E1r = 3; + return foob\u00E1r; //%self.expect("expr foobár", substrs=['(int)', ' = 3']) + } +}; + +int main() { + A a; + return a.foo(); +} Index: lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp =================================================================== --- lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp +++ lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp @@ -9,6 +9,8 @@ #include "ClangExpressionSourceCode.h" #include "clang/Basic/CharInfo.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" #include "llvm/ADT/StringRef.h" #include "Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h" @@ -164,44 +166,87 @@ } } -/// Checks if the expression body contains the given variable as a token. -/// \param body The expression body. -/// \param var The variable token we are looking for. -/// \return True iff the expression body containes the variable as a token. -static bool ExprBodyContainsVar(llvm::StringRef body, llvm::StringRef var) { - assert(var.find_if([](char c) { return !clang::isIdentifierBody(c); }) == - llvm::StringRef::npos && - "variable contains non-identifier chars?"); - - size_t start = 0; - // Iterate over all occurences of the variable string in our expression. - while ((start = body.find(var, start)) != llvm::StringRef::npos) { - // We found our variable name in the expression. Check that the token - // that contains our needle is equal to our variable and not just contains - // the character sequence by accident. - // Prevents situations where we for example inlcude the variable 'FOO' in an - // expression like 'FOObar + 1'. - bool has_characters_before = - start != 0 && clang::isIdentifierBody(body[start - 1]); - bool has_characters_after = - start + var.size() < body.size() && - clang::isIdentifierBody(body[start + var.size()]); - - // Our token just contained the variable name as a substring. Continue - // searching the rest of the expression. - if (has_characters_before || has_characters_after) { - ++start; +namespace { +/// Allows checking if a token is contained in a given expression. +class TokenVerifier { + /// The tokens we found in the expression. + llvm::StringSet<> m_tokens; + +public: + TokenVerifier(std::string body); + /// Returns true iff the given expression body contained a token with the + /// given content. + bool hasToken(llvm::StringRef token) const { + return m_tokens.find(token) != m_tokens.end(); + } +}; +} // namespace + +TokenVerifier::TokenVerifier(std::string body) { + using namespace clang; + + // We only care about tokens and not their original source locations. If we + // move the whole expression to only be in one line we can simplify the + // following code that extracts the token contents. + std::replace(body.begin(), body.end(), '\n', ' '); + std::replace(body.begin(), body.end(), '\r', ' '); + + FileSystemOptions file_opts; + FileManager file_mgr(file_opts, + FileSystem::Instance().GetVirtualFileSystem()); + + // Let's build the actual source code Clang needs and setup some utility + // objects. + llvm::IntrusiveRefCntPtr diag_ids(new DiagnosticIDs()); + llvm::IntrusiveRefCntPtr diags_opts( + new DiagnosticOptions()); + DiagnosticsEngine diags(diag_ids, diags_opts); + clang::SourceManager SM(diags, file_mgr); + auto buf = llvm::MemoryBuffer::getMemBuffer(body); + + FileID FID = SM.createFileID(clang::SourceManager::Unowned, buf.get()); + + // Let's just enable the latest ObjC and C++ which should get most tokens + // right. + LangOptions Opts; + Opts.ObjC = true; + Opts.DollarIdents = true; + Opts.CPlusPlus17 = true; + Opts.LineComment = true; + + Lexer lex(FID, buf.get(), SM, Opts); + + Token token; + bool exit = false; + while (!exit) { + // Returns true if this is the last token we get from the lexer. + exit = lex.LexFromRawLexer(token); + + // Extract the column number which we need to extract the token content. + // Our expression is just one line, so we don't need to handle any line + // numbers here. + bool invalid = false; + unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid); + if (invalid) continue; - } - return true; + // Column numbers start at 1, but indexes in our string start at 0. + --start; + + // Annotations don't have a length, so let's skip them. + if (token.isAnnotation()) + continue; + + // Extract the token string from our source code and store it. + m_tokens.insert(body.substr(start, token.getLength())); } - return false; } static void AddLocalVariableDecls(const lldb::VariableListSP &var_list_sp, StreamString &stream, const std::string &expr, lldb::LanguageType wrapping_language) { + TokenVerifier tokens(expr); + for (size_t i = 0; i < var_list_sp->GetSize(); i++) { lldb::VariableSP var_sp = var_list_sp->GetVariableAtIndex(i); @@ -213,7 +258,7 @@ if (!var_name || var_name == ".block_descriptor") continue; - if (!expr.empty() && !ExprBodyContainsVar(expr, var_name.GetStringRef())) + if (!expr.empty() && !tokens.hasToken(var_name.GetStringRef())) continue; if ((var_name == "self" || var_name == "_cmd") &&