Index: lib/Support/CommandLine.cpp =================================================================== --- lib/Support/CommandLine.cpp +++ lib/Support/CommandLine.cpp @@ -708,6 +708,7 @@ SmallVectorImpl &NewArgv, bool MarkEOLs) { SmallString<128> Token; + bool NonWhitespaceSeenInLine = false; for (size_t I = 0, E = Src.size(); I != E; ++I) { // Consume runs of whitespace. if (Token.empty()) { @@ -719,11 +720,27 @@ } if (I == E) break; + // Skip comment line. + if (!NonWhitespaceSeenInLine && Src[I] == '#') { + ++I; + while (I != E && Src[I] != '\n') + ++I; + if (I == E) + break; + continue; + } + NonWhitespaceSeenInLine = true; } // Backslash escapes the next character. if (I + 1 < E && Src[I] == '\\') { ++I; // Skip the escape. + if (Src[I] == '\n') + continue; // Ignore backlash followed by '\n'. + if (Src[I] == '\r' && I + 1 < E && Src[I + 1] == '\n') { + ++I; + continue; // Ignore backlash followed by \r\n. + } Token.push_back(Src[I]); continue; } @@ -748,6 +765,8 @@ if (!Token.empty()) NewArgv.push_back(Saver.save(StringRef(Token)).data()); Token.clear(); + if (Src[I] == '\n') + NonWhitespaceSeenInLine = false; continue; } @@ -809,6 +828,7 @@ // This is a small state machine to consume characters until it reaches the // end of the source string. enum { INIT, UNQUOTED, QUOTED } State = INIT; + bool NonWhitespaceSeenInLine = false; for (size_t I = 0, E = Src.size(); I != E; ++I) { // INIT state indicates that the current input index is at the start of // the string or between tokens. @@ -819,6 +839,18 @@ NewArgv.push_back(nullptr); continue; } + + // Skip comment line. + if (!NonWhitespaceSeenInLine && Src[I] == '#') { + ++I; + while (I != E && Src[I] != '\n') + ++I; + if (I == E) + break; + continue; + } + NonWhitespaceSeenInLine = true; + if (Src[I] == '"') { State = QUOTED; continue; @@ -841,9 +873,12 @@ NewArgv.push_back(Saver.save(StringRef(Token)).data()); Token.clear(); State = INIT; - // Mark the end of lines in response files - if (MarkEOLs && Src[I] == '\n') - NewArgv.push_back(nullptr); + if (Src[I] == '\n') { + NonWhitespaceSeenInLine = false; + // Mark the end of lines in response files + if (MarkEOLs) + NewArgv.push_back(nullptr); + } continue; } if (Src[I] == '"') { Index: unittests/Support/CommandLineTest.cpp =================================================================== --- unittests/Support/CommandLineTest.cpp +++ unittests/Support/CommandLineTest.cpp @@ -198,6 +198,56 @@ array_lengthof(Output)); } +TEST(CommandLineTest, TokenizeGNUCommandLineBS) { + const char *Input = "\\"; + const char *Output[1] = { "\\" }; + testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output, 1); + + Input = "abc\\"; + Output[0] = "abc\\"; + testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output, 1); + + Input = "\\abc"; + Output[0] = "abc"; + testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output, 1); + + Input = "abc\\123"; + Output[0] = "abc123"; + testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output, 1); + + Input = "abc\\1"; + Output[0] = "abc1"; + testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output, 1); + + Input = "abc\\\\123"; + Output[0] = "abc\\123"; + testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output, 1); + + Input = "\\\nabc"; + Output[0] = "abc"; + testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output, 1); + + Input = "\\\r\nabc"; + Output[0] = "abc"; + testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output, 1); + + Input = "abc\\\n123"; + Output[0] = "abc123"; + testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output, 1); + + Input = "abc\\\r\n123"; + Output[0] = "abc123"; + testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output, 1); + + Input = "abc\\\n"; + Output[0] = "abc"; + testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output, 1); + + Input = "abc\\\r\n"; + Output[0] = "abc"; + testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output, 1); +} + TEST(CommandLineTest, TokenizeWindowsCommandLine) { const char Input[] = "a\\b c\\\\d e\\\\\"f g\" h\\\"i j\\\\\\\"k \"lmn\" o pqr " "\"st \\\"u\" \\v"; @@ -207,6 +257,38 @@ array_lengthof(Output)); } +TEST(CommandLineTest, TokenizeCommandLineComment) { + for (auto Tokenizer : { cl::TokenizeGNUCommandLine , + cl::TokenizeWindowsCommandLine }) { + const char *Input = "# abc\n" + "123"; + const char *Output[3] = { "123" }; + testCommandLineTokenizer(Tokenizer, Input, Output, 1); + + Input = " # abc\n" + "123"; + Output[0] = "123"; + testCommandLineTokenizer(Tokenizer, Input, Output, 1); + + Input = "123 # abc"; + Output[0] = "123"; + Output[1] = "#"; + Output[2] = "abc"; + testCommandLineTokenizer(Tokenizer, Input, Output, 3); + + Input = "abc\n" + "#123"; + Output[0] = "abc"; + testCommandLineTokenizer(Tokenizer, Input, Output, 1); + + Input = "abc def\n" + "#123"; + Output[0] = "abc"; + Output[1] = "def"; + testCommandLineTokenizer(Tokenizer, Input, Output, 2); + } +} + TEST(CommandLineTest, AliasesWithArguments) { static const size_t ARGC = 3; const char *const Inputs[][ARGC] = {