diff --git a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp --- a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -93,6 +93,7 @@ void printDirectiveBody(const char *&First, const char *const End); void printAdjacentMacroArgs(const char *&First, const char *const End); LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End); + void maybePrintNewLine(const char *&Last, const char *const End); /// Reports a diagnostic if the diagnostic engine is provided. Always returns /// true at the end. @@ -446,13 +447,15 @@ } } -static void skipWhitespace(const char *&First, const char *const End) { +static size_t skipWhitespace(const char *&First, const char *const End) { + const char *FirstBefore = First; + for (;;) { assert(First <= End); skipOverSpaces(First, End); if (End - First < 2) - return; + break; if (First[0] == '\\' && isVerticalWhitespace(First[1])) { skipNewline(++First, End); @@ -461,21 +464,23 @@ // Check for a non-comment character. if (First[0] != '/') - return; + break; // "// ...". if (First[1] == '/') { skipLineComment(First, End); - return; + break; } // Cannot be a comment. if (First[1] != '*') - return; + break; // "/*...*/". skipBlockComment(First, End); } + + return First - FirstBefore; } void Minimizer::printAdjacentModuleNameParts(const char *&First, @@ -519,7 +524,7 @@ printToNewline(First, End); while (Out.back() == ' ') Out.pop_back(); - put('\n'); + maybePrintNewLine(First, End); } LLVM_NODISCARD static const char *lexRawIdentifier(const char *First, @@ -595,6 +600,12 @@ } } +void Minimizer::maybePrintNewLine(const char *&Last, const char *const End) { + // Only print newline if doing so won't make the output larger than the input. + if (Last != End) + put('\n'); +} + /// Looks for an identifier starting from Last. /// /// Updates "First" to just past the next identifier, if any. Returns true iff @@ -704,15 +715,17 @@ // Be robust to bad macro arguments, since they can show up in disabled // code. Out.resize(Size); - append("(/* invalid */\n"); + maybePrintNewLine(Last, End); skipLine(Last, End); return false; } } - skipWhitespace(Last, End); + size_t WhitespaceLength = skipWhitespace(Last, End); if (Last == End) return false; - if (!isVerticalWhitespace(*Last)) + // Only print space if we actually skipped some whitespace. This prevents + // making the output larger than the input. + if (WhitespaceLength > 0 && !isVerticalWhitespace(*Last)) put(' '); printDirectiveBody(Last, End); First = Last; @@ -887,9 +900,11 @@ bool Minimizer::minimize() { bool Error = minimizeImpl(Input.begin(), Input.end()); + assert(Out.size() <= Input.size() && "Output is not larger than input"); + if (!Error) { - // Add a trailing newline and an EOF on success. - if (!Out.empty() && Out.back() != '\n') + // Add a trailing newline if it won't make the output larger than the input. + if (!Out.empty() && Out.back() != '\n' && Out.size() < Input.size()) Out.push_back('\n'); makeToken(pp_eof); } diff --git a/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp b/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp --- a/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp @@ -94,7 +94,7 @@ ASSERT_FALSE( minimizeSourceToDependencyDirectives("#define MACRO", Out, Tokens)); - EXPECT_STREQ("#define MACRO\n", Out.data()); + EXPECT_STREQ("#define MACRO", Out.data()); ASSERT_EQ(2u, Tokens.size()); ASSERT_EQ(pp_define, Tokens.front().K); } @@ -123,7 +123,7 @@ SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO()", Out)); - EXPECT_STREQ("#define MACRO()\n", Out.data()); + EXPECT_STREQ("#define MACRO()", Out.data()); ASSERT_FALSE( minimizeSourceToDependencyDirectives("#define MACRO(a, b...)", Out)); @@ -131,7 +131,7 @@ ASSERT_FALSE( minimizeSourceToDependencyDirectives("#define MACRO content", Out)); - EXPECT_STREQ("#define MACRO content\n", Out.data()); + EXPECT_STREQ("#define MACRO content", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#define MACRO con tent ", Out)); @@ -146,14 +146,14 @@ SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO((a))", Out)); - EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + EXPECT_STREQ("#define MACRO\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO(", Out)); - EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + EXPECT_STREQ("#define MACRO\n", Out.data()); ASSERT_FALSE( minimizeSourceToDependencyDirectives("#define MACRO(a * b)", Out)); - EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + EXPECT_STREQ("#define MACRO\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) { @@ -259,7 +259,7 @@ SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND&\n", Out)); - EXPECT_STREQ("#define AND &\n", Out.data()); + EXPECT_STREQ("#define AND&\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND\\\n" "&\n",