Index: lib/Format/BreakableToken.h =================================================================== --- lib/Format/BreakableToken.h +++ lib/Format/BreakableToken.h @@ -137,9 +137,9 @@ return Split(StringRef::npos, 0); } - /// \brief Returns if a break before the content at \p LineIndex will be - /// inserted after the whitespace preceding the content has been reformatted. - virtual bool introducesBreakBefore(unsigned LineIndex) const { + /// \brief Returns whether there will be a line break at the start of the + /// token. + virtual bool introducesBreakBeforeToken() const { return false; } @@ -347,7 +347,7 @@ Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, llvm::Regex &CommentPragmasRegex) const override; - bool introducesBreakBefore(unsigned LineIndex) const override; + bool introducesBreakBeforeToken() const override; unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, unsigned TailOffset, unsigned PreviousEndColumn, Index: lib/Format/BreakableToken.cpp =================================================================== --- lib/Format/BreakableToken.cpp +++ lib/Format/BreakableToken.cpp @@ -605,9 +605,9 @@ } } -bool BreakableBlockComment::introducesBreakBefore(unsigned LineIndex) const { +bool BreakableBlockComment::introducesBreakBeforeToken() const { // A break is introduced when we want delimiters on newline. - return LineIndex == 0 && DelimitersOnNewline && + return DelimitersOnNewline && Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos; } Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -1487,8 +1487,14 @@ if (Current.UnbreakableTailLength >= ColumnLimit) return 0; + unsigned NewBreakPenalty = Current.isStringLiteral() + ? Style.PenaltyBreakString + : Style.PenaltyBreakComment; unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength; - bool BreakInserted = false; + bool BreakInserted = Token->introducesBreakBeforeToken(); + // Store whether we inserted a new line break at the end of the previous + // logical line. + bool NewBreakBefore = false; // We use a conservative reflowing strategy. Reflow starts after a line is // broken or the corresponding whitespace compressed. Reflow ends as soon as a // line that doesn't get reflown with the previous line is reached. @@ -1496,23 +1502,47 @@ unsigned Penalty = 0; unsigned RemainingTokenColumns = 0; unsigned TailOffset = 0; + DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn + << ".\n"); for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); LineIndex != EndIndex; ++LineIndex) { + DEBUG(llvm::dbgs() << " Line: " << LineIndex + << " (Reflow: " << ReflowInProgress << ")\n"); BreakableToken::Split SplitBefore(StringRef::npos, 0); if (ReflowInProgress) { SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns, RemainingSpace, CommentPragmasRegex); } ReflowInProgress = SplitBefore.first != StringRef::npos; + DEBUG(if (ReflowInProgress) llvm::dbgs() << " Reflowing.\n"); TailOffset = ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0; - BreakInserted = BreakInserted || Token->introducesBreakBefore(LineIndex); + // If we found a reflow split and have added a new break before this line, + // we are going to remove the line break at the start of the next logical + // line. + // For example, here we'll add a new line break after 'text', and + // subsequently delete the line break between 'that' and 'reflows'. + // // some text that + // // reflows + // -> + // // some text + // // that reflows + // When adding the line break, we also added the penalty for it, so we need + // to subtract that penalty again when we remove the line break due to + // reflowing. + if (ReflowInProgress && NewBreakBefore) { + assert(Penalty >= NewBreakPenalty); + Penalty -= NewBreakPenalty; + } + NewBreakBefore = false; if (!DryRun) Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns, RemainingSpace, SplitBefore, Whitespaces); RemainingTokenColumns = Token->getLineLengthAfterSplitBefore( LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore); while (RemainingTokenColumns > RemainingSpace) { + DEBUG(llvm::dbgs() << " Over limit, need: " << RemainingTokenColumns + << ", space: " << RemainingSpace << "\n"); BreakableToken::Split Split = Token->getSplit( LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex); if (Split.first == StringRef::npos) { @@ -1520,6 +1550,7 @@ if (LineIndex < EndIndex - 1) Penalty += Style.PenaltyExcessCharacter * (RemainingTokenColumns - RemainingSpace); + DEBUG(llvm::dbgs() << " No break opportunity.\n"); break; } assert(Split.first != 0); @@ -1534,6 +1565,37 @@ ReflowInProgress = true; if (!DryRun) Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); + DEBUG(llvm::dbgs() << " Compressing below limit.\n"); + break; + } + + // Compute both the penalties for: + // - not breaking, and leaving excess characters + // - adding a new line break + assert(RemainingTokenColumnsAfterCompression > RemainingSpace); + unsigned ExcessCharactersPenalty = + (RemainingTokenColumnsAfterCompression - RemainingSpace) * + Style.PenaltyExcessCharacter; + + unsigned BreakPenalty = NewBreakPenalty; + unsigned ColumnsUsed = + Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); + if (ColumnsUsed > ColumnLimit) + BreakPenalty += + Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit); + + DEBUG(llvm::dbgs() << " Penalty excess: " << ExcessCharactersPenalty + << "\n break : " << BreakPenalty << "\n"); + // Only continue to add the line break if the penalty of the excess + // characters is larger than the penalty of the line break. + // FIXME: This does not take into account when we can later remove the + // line break again due to a reflow. + if (ExcessCharactersPenalty < BreakPenalty) { + if (!DryRun) + Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); + // Do not set ReflowInProgress: we do not have any space left to + // reflow into. + Penalty += ExcessCharactersPenalty; break; } @@ -1544,27 +1606,27 @@ // but will still be expanded to the next tab stop, so we don't save any // columns. if (NewRemainingTokenColumns == RemainingTokenColumns) + // FIXME: Do we need to adjust the penalty? break; - assert(NewRemainingTokenColumns < RemainingTokenColumns); + + if (!DryRun) Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); - Penalty += Current.SplitPenalty; - unsigned ColumnsUsed = - Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); - if (ColumnsUsed > ColumnLimit) { - Penalty += Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit); - } + + Penalty += BreakPenalty; TailOffset += Split.first + Split.second; RemainingTokenColumns = NewRemainingTokenColumns; ReflowInProgress = true; BreakInserted = true; + NewBreakBefore = true; } } BreakableToken::Split SplitAfterLastLine = Token->getSplitAfterLastLine(TailOffset, ColumnLimit); if (SplitAfterLastLine.first != StringRef::npos) { + DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n"); if (!DryRun) Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine, Whitespaces); @@ -1586,9 +1648,6 @@ if (Current.is(TT_BlockComment)) State.NoContinuation = true; - Penalty += Current.isStringLiteral() ? Style.PenaltyBreakString - : Style.PenaltyBreakComment; - State.Stack.back().LastSpace = StartColumn; } Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -8002,9 +8002,9 @@ " \"f\");", format("someFunction1234567890(\"aaabbbcccdddeeefff\");", getLLVMStyleWithColumns(24))); - EXPECT_EQ("someFunction(\"aaabbbcc \"\n" - " \"ddde \"\n" - " \"efff\");", + EXPECT_EQ("someFunction(\n" + " \"aaabbbcc ddde \"\n" + " \"efff\");", format("someFunction(\"aaabbbcc ddde efff\");", getLLVMStyleWithColumns(25))); EXPECT_EQ("someFunction(\"aaabbbccc \"\n" @@ -8023,10 +8023,9 @@ " int i;", format("#define A string s = \"1234567890\"; int i;", getLLVMStyleWithColumns(20))); - // FIXME: Put additional penalties on breaking at non-whitespace locations. - EXPECT_EQ("someFunction(\"aaabbbcc \"\n" - " \"dddeeeff\"\n" - " \"f\");", + EXPECT_EQ("someFunction(\n" + " \"aaabbbcc \"\n" + " \"dddeeefff\");", format("someFunction(\"aaabbbcc dddeeefff\");", getLLVMStyleWithColumns(25))); } @@ -9895,6 +9894,106 @@ EXPECT_EQ("#pragma option -C -A", format("#pragma option -C -A")); } +TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) { + FormatStyle Style = getLLVMStyle(); + Style.ColumnLimit = 20; + + verifyFormat("int a; // the\n" + " // comment", Style); + EXPECT_EQ("int a; /* first line\n" + " * second\n" + " * line third\n" + " * line\n" + " */", + format("int a; /* first line\n" + " * second\n" + " * line third\n" + " * line\n" + " */", + Style)); + EXPECT_EQ("int a; // first line\n" + " // second\n" + " // line third\n" + " // line", + format("int a; // first line\n" + " // second line\n" + " // third line", + Style)); + + Style.PenaltyExcessCharacter = 90; + verifyFormat("int a; // the comment", Style); + EXPECT_EQ("int a; // the\n" + " // comment aa", + format("int a; // the comment aa", Style)); + EXPECT_EQ("int a; /* first line\n" + " * second line\n" + " * third line\n" + " */", + format("int a; /* first line\n" + " * second line\n" + " * third line\n" + " */", + Style)); + EXPECT_EQ("int a; // first line\n" + " // second line\n" + " // third line", + format("int a; // first line\n" + " // second line\n" + " // third line", + Style)); + EXPECT_EQ("int a; /* first line\n" + " * second\n" + " * line third\n" + " * line\n" + " */", + format("int a; /* first line second line third line" + "\n*/", + Style)); + + EXPECT_EQ("// foo bar baz bazfoo\n" + "// foo bar\n", + format("// foo bar baz bazfoo\n" + "// foo bar\n", + Style)); + EXPECT_EQ("// foo bar baz bazfoo\n" + "// foo bar\n", + format("// foo bar baz bazfoo\n" + "// foo bar\n", + Style)); + + // FIXME: Optimally, we'd keep bazfoo on the first line and reflow bar to the + // next one. + EXPECT_EQ("// foo bar baz\n" + "// bazfoo bar foo\n" + "// bar\n", + format("// foo bar baz bazfoo bar\n" + "// foo bar\n", + Style)); + + EXPECT_EQ("// foo bar baz bazfoo\n" + "// foo bar baz\n" + "// bazfoo bar foo\n" + "// bar\n", + format("// foo bar baz bazfoo\n" + "// foo bar baz bazfoo bar\n" + "// foo bar\n", + Style)); + + // FIXME: Optimally, we'd keep 'bar' in the last line at the end of the line, + // as it does not actually protrude far enough to make breaking pay off. + // Unfortunately, due to how reflowing is currently implemented, we already + // check the column limit after the reflowing decision and extend the reflow + // range, so we will not take whitespace compression into account. + EXPECT_EQ("// foo bar baz bazfoo\n" + "// foo bar baz\n" + "// bazfoo bar foo\n" + "// bar\n", + format("// foo bar baz bazfoo\n" + "// foo bar baz bazfoo bar\n" + "// foo bar\n", + Style)); +} + #define EXPECT_ALL_STYLES_EQUAL(Styles) \ for (size_t i = 1; i < Styles.size(); ++i) \ EXPECT_EQ(Styles[0], Styles[i]) << "Style #" << i << " of " << Styles.size() \ Index: unittests/Format/FormatTestComments.cpp =================================================================== --- unittests/Format/FormatTestComments.cpp +++ unittests/Format/FormatTestComments.cpp @@ -2078,6 +2078,22 @@ " // longsec\n", getLLVMStyleWithColumns(20))); + // Simple case that correctly handles reflow in parameter lists. + EXPECT_EQ("a = f(/* looooooooong\n" + " * long long\n" + " */\n" + " a);", + format("a = f(/* looooooooong long\n* long\n*/ a);", + getLLVMStyleWithColumns(22))); + // Tricky case that has fewer lines if we reflow the comment, ending up with + // fewer lines. + EXPECT_EQ("a = f(/* loooooong\n" + " * long long\n" + " */\n" + " a);", + format("a = f(/* loooooong long\n* long\n*/ a);", + getLLVMStyleWithColumns(22))); + // Keep empty comment lines. EXPECT_EQ("/**/", format(" /**/", getLLVMStyleWithColumns(20))); EXPECT_EQ("/* */", format(" /* */", getLLVMStyleWithColumns(20))); @@ -2426,10 +2442,9 @@ TEST_F(FormatTestComments, BreaksAfterMultilineBlockCommentsInParamLists) { EXPECT_EQ("a = f(/* long\n" - " long\n" - " */\n" + " long */\n" " a);", - format("a = f(/* long long */ a);", getLLVMStyleWithColumns(15))); + format("a = f(/* long long */ a);", getLLVMStyleWithColumns(16))); EXPECT_EQ("a = f(/* long\n" " long\n" @@ -2438,7 +2453,7 @@ format("a = f(/* long\n" " long\n" " */a);", - getLLVMStyleWithColumns(15))); + getLLVMStyleWithColumns(16))); EXPECT_EQ("a = f(/* long\n" " long\n" @@ -2447,7 +2462,7 @@ format("a = f(/* long\n" " long\n" " */ a);", - getLLVMStyleWithColumns(15))); + getLLVMStyleWithColumns(16))); EXPECT_EQ("a = f(/* long\n" " long\n" @@ -2456,23 +2471,21 @@ format("a = f(/* long\n" " long\n" " */ (1 + 1));", - getLLVMStyleWithColumns(15))); + getLLVMStyleWithColumns(16))); EXPECT_EQ( "a = f(a,\n" " /* long\n" - " long\n" - " */\n" + " long */\n" " b);", - format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(15))); + format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16))); EXPECT_EQ( "a = f(a,\n" " /* long\n" - " long\n" - " */\n" + " long */\n" " (1 + 1));", - format("a = f(a, /* long long */ (1 + 1));", getLLVMStyleWithColumns(15))); + format("a = f(a, /* long long */ (1 + 1));", getLLVMStyleWithColumns(16))); } TEST_F(FormatTestComments, IndentLineCommentsInStartOfBlockAtEndOfFile) {