Index: include/lldb/Interpreter/Args.h =================================================================== --- include/lldb/Interpreter/Args.h +++ include/lldb/Interpreter/Args.h @@ -18,6 +18,7 @@ #include // Other libraries and framework includes +#include "llvm/ADT/StringRef.h" // Project includes #include "lldb/lldb-private-types.h" #include "lldb/lldb-types.h" @@ -77,7 +78,7 @@ /// /// @see Args::SetCommandString(const char *) //------------------------------------------------------------------ - Args (const char *command = NULL); + Args (llvm::StringRef command = llvm::StringRef()); Args (const char *command, size_t len); @@ -108,7 +109,7 @@ /// that can be accessed via the accessor functions. /// /// @param[in] command - /// A NULL terminated command that will be copied and split up + /// A command StringRef that will be copied and split up /// into arguments. /// /// @see Args::GetArgumentCount() const @@ -118,7 +119,7 @@ /// @see Args::Unshift (const char *) //------------------------------------------------------------------ void - SetCommandString (const char *command); + SetCommandString (llvm::StringRef command); void SetCommandString (const char *command, size_t len); @@ -449,6 +450,9 @@ void UpdateArgvFromArgs (); + + llvm::StringRef + ParseSingleArgument (llvm::StringRef command); }; } // namespace lldb_private Index: source/Interpreter/Args.cpp =================================================================== --- source/Interpreter/Args.cpp +++ source/Interpreter/Args.cpp @@ -33,13 +33,12 @@ //---------------------------------------------------------------------- // Args constructor //---------------------------------------------------------------------- -Args::Args (const char *command) : +Args::Args (llvm::StringRef command) : m_args(), m_argv(), m_args_quote_char() { - if (command) - SetCommandString (command); + SetCommandString (command); } @@ -48,8 +47,7 @@ m_argv(), m_args_quote_char() { - if (command && len) - SetCommandString (command, len); + SetCommandString (llvm::StringRef(command, len)); } //---------------------------------------------------------------------- @@ -149,211 +147,162 @@ void Args::SetCommandString (const char *command, size_t len) { - // Use std::string to make sure we get a NULL terminated string we can use - // as "command" could point to a string within a large string.... - std::string null_terminated_command(command, len); - SetCommandString(null_terminated_command.c_str()); + SetCommandString(llvm::StringRef(command, len)); } -void -Args::SetCommandString (const char *command) +// A helper function for argument parsing. +// Parses the initial part of the first argument using normal double quote rules: +// backslash escapes the double quote and itself. The parsed string is appended to the second +// argument. The function returns the unparsed portion of the string, starting at the closing +// quote. +static llvm::StringRef +ParseDoubleQuotes(llvm::StringRef quoted, std::string &result) { - m_args.clear(); - m_argv.clear(); - m_args_quote_char.clear(); + // Inside double quotes, '\' and '"' are special. + static const char *k_escapable_characters = "\"\\"; + while (true) + { + // Skip over over regular characters and append them. + size_t regular = quoted.find_first_of(k_escapable_characters); + result += quoted.substr(0, regular); + quoted = quoted.substr(regular); + + // If we have reached the end of string or the closing quote, we're done. + if (quoted.empty() || quoted.front() == '"') + break; + + // We have found a backslash. + quoted = quoted.drop_front(); + + if (quoted.empty()) + { + // A lone backslash at the end of string, let's just append it. + result += '\\'; + break; + } + + // If the character after the backslash is not a whitelisted escapable character, we + // leave the character sequence untouched. + if (strchr(k_escapable_characters, quoted.front()) == nullptr) + result += '\\'; + + result += quoted.front(); + quoted = quoted.drop_front(); + } + + return quoted; +} - if (command && command[0]) +// A helper function for SetCommandString. +// Parses a single argument from the command string, processing quotes and backslashes in a +// shell-like manner. The parsed argument is appended to the m_args array. The function returns +// the unparsed portion of the string, starting at the first unqouted, unescaped whitespace +// character. +llvm::StringRef +Args::ParseSingleArgument(llvm::StringRef command) +{ + // Argument can be split into multiple discontiguous pieces, + // for example: + // "Hello ""World" + // this would result in a single argument "Hello World" (without/ + // the quotes) since the quotes would be removed and there is + // not space between the strings. + + std::string arg; + + // Since we can have multiple quotes that form a single command + // in a command like: "Hello "world'!' (which will make a single + // argument "Hello world!") we remember the first quote character + // we encounter and use that for the quote character. + char first_quote_char = '\0'; + + bool arg_complete = false; + do { - static const char *k_space_separators = " \t"; - static const char *k_escapable_characters = " \t\\'\""; - const char *arg_end = nullptr; - const char *arg_pos; - for (arg_pos = command; - arg_pos && arg_pos[0]; - arg_pos = arg_end) + // Skip over over regular characters and append them. + size_t regular = command.find_first_of(" \t\"'`\\"); + arg += command.substr(0, regular); + command = command.substr(regular); + + if (command.empty()) + break; + + char special = command.front(); + command = command.drop_front(); + switch (special) { - // Skip any leading space separators - const char *arg_start = ::strspn (arg_pos, k_space_separators) + arg_pos; - - // If there were only space separators to the end of the line, then - // we're done. - if (*arg_start == '\0') + case '\\': + if (command.empty()) + { + arg += '\\'; break; + } - // Arguments can be split into multiple discontiguous pieces, - // for example: - // "Hello ""World" - // this would result in a single argument "Hello World" (without/ - // the quotes) since the quotes would be removed and there is - // not space between the strings. So we need to keep track of the - // current start of each argument piece in "arg_piece_start" - const char *arg_piece_start = arg_start; - arg_pos = arg_piece_start; - - std::string arg; - // Since we can have multiple quotes that form a single command - // in a command like: "Hello "world'!' (which will make a single - // argument "Hello world!") we remember the first quote character - // we encounter and use that for the quote character. - char first_quote_char = '\0'; - char quote_char = '\0'; - bool arg_complete = false; - - do - { - arg_end = ::strcspn (arg_pos, k_escapable_characters) + arg_pos; + // If the character after the backslash is not a whitelisted escapable character, we + // leave the character sequence untouched. + static const char *k_escapable_characters = " \t\\'\"`"; + if (strchr(k_escapable_characters, command.front()) == nullptr) + arg += '\\'; - switch (arg_end[0]) - { - default: - assert (!"Unhandled case statement, we must handle this..."); - break; + arg += command.front(); + command.drop_front(); - case '\0': - // End of C string - if (arg_piece_start && arg_piece_start[0]) - arg.append (arg_piece_start); - arg_complete = true; - break; - case '\\': - // Backslash character - switch (arg_end[1]) - { - case '\0': - arg.append (arg_piece_start); - ++arg_end; - arg_complete = true; - break; - - default: - // Only consider this two-character sequence an escape sequence if we're unquoted and - // the character after the backslash is a whitelisted escapable character. Otherwise - // leave the character sequence untouched. - if (quote_char == '\0' && (nullptr != strchr(k_escapable_characters, arg_end[1]))) - { - arg.append (arg_piece_start, arg_end - arg_piece_start); - arg.append (arg_end + 1, 1); - arg_pos = arg_end + 2; - arg_piece_start = arg_pos; - } - else - arg_pos = arg_end + 2; - break; - } - break; - case '"': - case '\'': - case '`': - // Quote characters - if (quote_char) - { - // We found a quote character while inside a quoted - // character argument. If it matches our current quote - // character, this ends the effect of the quotes. If it - // doesn't we ignore it. - if (quote_char == arg_end[0]) - { - arg.append (arg_piece_start, arg_end - arg_piece_start); - // Clear the quote character and let parsing - // continue (we need to watch for things like: - // "Hello ""World" - // "Hello "World - // "Hello "'World' - // All of which will result in a single argument "Hello World" - quote_char = '\0'; // Note that we are no longer inside quotes - arg_pos = arg_end + 1; // Skip the quote character - arg_piece_start = arg_pos; // Note we are starting from later in the string - } - else - { - // different quote, skip it and keep going - arg_pos = arg_end + 1; - } - } - else - { - // We found the start of a quote scope. - // Make sure there isn't a string that precedes - // the start of a quote scope like: - // Hello" World" - // If so, then add the "Hello" to the arg - if (arg_end > arg_piece_start) - arg.append (arg_piece_start, arg_end - arg_piece_start); - - // Enter into a quote scope - quote_char = arg_end[0]; - - if (first_quote_char == '\0') - first_quote_char = quote_char; + break; - arg_pos = arg_end; - ++arg_pos; // Skip the quote character - arg_piece_start = arg_pos; // Note we are starting from later in the string - - // Skip till the next quote character - const char *end_quote = ::strchr (arg_piece_start, quote_char); - while (end_quote && end_quote[-1] == '\\') - { - // Don't skip the quote character if it is - // preceded by a '\' character - end_quote = ::strchr (end_quote + 1, quote_char); - } - - if (end_quote) - { - if (end_quote > arg_piece_start) - arg.append (arg_piece_start, end_quote - arg_piece_start); + case ' ': + case '\t': + // We are not inside any quotes, we just found a space after an + // argument. We are done. + arg_complete = true; + break; - // If the next character is a space or the end of - // string, this argument is complete... - if (end_quote[1] == ' ' || end_quote[1] == '\t' || end_quote[1] == '\0') - { - arg_complete = true; - arg_end = end_quote + 1; - } - else - { - arg_pos = end_quote + 1; - arg_piece_start = arg_pos; - } - quote_char = '\0'; - } - else - { - // Consume the rest of the string as there was no terminating quote - arg.append(arg_piece_start); - arg_end = arg_piece_start + strlen(arg_piece_start); - arg_complete = true; - } - } - break; + case '"': + case '\'': + case '`': + // We found the start of a quote scope. + if (first_quote_char == '\0') + first_quote_char = special; - case ' ': - case '\t': - if (quote_char) - { - // We are currently processing a quoted character and found - // a space character, skip any spaces and keep trying to find - // the end of the argument. - arg_pos = ::strspn (arg_end, k_space_separators) + arg_end; - } - else - { - // We are not inside any quotes, we just found a space after an - // argument - if (arg_end > arg_piece_start) - arg.append (arg_piece_start, arg_end - arg_piece_start); - arg_complete = true; - } - break; - } - } while (!arg_complete); + if (special == '"') + command = ParseDoubleQuotes(command, arg); + else + { + // For single quotes, we simply skip ahead to the matching quote character + // (or the end of the string). + size_t quoted = command.find(special); + arg += command.substr(0, quoted); + command = command.substr(quoted); + } - m_args.push_back(arg); - m_args_quote_char.push_back (first_quote_char); + // If we found a closing quote, skip it. + if (! command.empty()) + command = command.drop_front(); + + break; } - UpdateArgvFromArgs(); + } while (!arg_complete); + + m_args.push_back(arg); + m_args_quote_char.push_back (first_quote_char); + return command; +} + +void +Args::SetCommandString (llvm::StringRef command) +{ + m_args.clear(); + m_argv.clear(); + m_args_quote_char.clear(); + + static const char *k_space_separators = " \t"; + command = command.ltrim(k_space_separators); + while (!command.empty()) + { + command = ParseSingleArgument(command); + command = command.ltrim(k_space_separators); } + + UpdateArgvFromArgs(); } void Index: test/settings/quoting/Makefile =================================================================== --- /dev/null +++ test/settings/quoting/Makefile @@ -0,0 +1,5 @@ +LEVEL = ../../make + +C_SOURCES := main.c + +include $(LEVEL)/Makefile.rules Index: test/settings/quoting/TestQuoting.py =================================================================== --- /dev/null +++ test/settings/quoting/TestQuoting.py @@ -0,0 +1,82 @@ +""" +Test quoting of arguments to lldb commands +""" + +import os, time, re +import unittest2 +import lldb +from lldbtest import * + +class SettingsCommandTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @classmethod + def classCleanup(cls): + """Cleanup the test byproducts.""" + cls.RemoveTempFile("stdout.txt") + + def test_no_quote(self): + self.do_test_args("a b c", "a\0b\0c\0") + + def test_single_quote(self): + self.do_test_args("'a b c'", "a b c\0") + + def test_double_quote(self): + self.do_test_args('"a b c"', "a b c\0") + + def test_double_quote(self): + self.do_test_args('"a b c"', 'a b c\0') + + def test_single_quote_escape(self): + self.do_test_args("'a b\\' c", "a b\\\0c\0") + + def test_double_quote_escape(self): + self.do_test_args('"a b\\" c"', 'a b" c\0') + + def test_double_quote_escape(self): + self.do_test_args('"a b\\" c"', 'a b" c\0') + + def test_double_quote_escape2(self): + self.do_test_args('"a b\\\\" c', 'a b\\\0c\0') + + def test_double_quote_escape2(self): + self.do_test_args('"a b\\\\" c', 'a b\\\0c\0') + + def test_single_in_double(self): + self.do_test_args('"a\'b"', "a'b\0") + + def test_double_in_single(self): + self.do_test_args("'a\"b'", 'a"b\0') + + def test_combined(self): + self.do_test_args('"a b"c\'d e\'', 'a bcd e\0') + + def test_bare_single(self): + self.do_test_args("a\\'b", "a'b\0") + + def test_bare_double(self): + self.do_test_args('a\\"b', 'a"b\0') + + def do_test_args(self, args_in, args_out): + """Test argument parsing. Run the program with args_in. The program dumps its arguments + to stdout. Compare the stdout with args_out.""" + self.buildDefault() + + exe = os.path.join(os.getcwd(), "a.out") + self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) + + self.runCmd("process launch -o stdout.txt -- " + args_in) + + with open('stdout.txt', 'r') as f: + output = f.read() + + self.RemoveTempFile("stdout.txt") + + self.assertEqual(output, args_out) + +if __name__ == '__main__': + import atexit + lldb.SBDebugger.Initialize() + atexit.register(lambda: lldb.SBDebugger.Terminate()) + unittest2.main() Index: test/settings/quoting/main.c =================================================================== --- /dev/null +++ test/settings/quoting/main.c @@ -0,0 +1,13 @@ +#include +#include + +/* This program writes its arguments (separated by '\0') to stdout. */ +int +main(int argc, char const *argv[]) +{ + int i; + for (i = 1; i < argc; ++i) + fwrite(argv[i], strlen(argv[i])+1, 1, stdout); + + return 0; +}