Index: lib/Format/ContinuationIndenter.h =================================================================== --- lib/Format/ContinuationIndenter.h +++ lib/Format/ContinuationIndenter.h @@ -27,6 +27,7 @@ namespace format { class AnnotatedLine; +class BreakableToken; struct FormatToken; struct LineState; struct ParenState; @@ -100,6 +101,11 @@ unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun); + unsigned reflowProtrudingToken(const FormatToken & Current, LineState & State, + std::unique_ptr & Token, + unsigned ColumnLimit, bool DryRun); + + /// \brief Appends the next token to \p State and updates information /// necessary for indentation. /// @@ -350,6 +356,8 @@ /// \brief The indent of the first token. unsigned FirstIndent; + bool Reflow = true; + /// \brief The line that is being formatted. /// /// Does not need to be considered for memoization because it doesn't change. Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -1219,6 +1219,112 @@ return 0; } +unsigned ContinuationIndenter::reflowProtrudingToken(const FormatToken &Current, LineState &State, + std::unique_ptr &Token, + unsigned ColumnLimit, bool DryRun) { + unsigned StartColumn = State.Column - Current.ColumnWidth; + + unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength; + bool BreakInserted = false; + // We use a conservative reflowing strategy. Reflow starts after a line is + // broken or the corresponding whitespace compressed. Reflow ends as soon as a + // line that doesn't get reflown with the previous line is reached. + bool ReflowInProgress = false; + unsigned Penalty = 0; + unsigned RemainingTokenColumns = 0; + for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); + LineIndex != EndIndex; ++LineIndex) { + BreakableToken::Split SplitBefore(StringRef::npos, 0); + if (ReflowInProgress) { + SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns, + RemainingSpace, CommentPragmasRegex); + } + ReflowInProgress = SplitBefore.first != StringRef::npos; + unsigned TailOffset = + ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0; + if (!DryRun) + Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns, + RemainingSpace, SplitBefore, Whitespaces); + RemainingTokenColumns = Token->getLineLengthAfterSplitBefore( + LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore); + if (!State.Reflow) { + if (RemainingTokenColumns > RemainingSpace) + Penalty += Style.PenaltyExcessCharacter * + (RemainingTokenColumns - RemainingSpace); + continue; + } + while (RemainingTokenColumns > RemainingSpace) { + BreakableToken::Split Split = Token->getSplit( + LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex); + if (Split.first == StringRef::npos) { + Penalty += Style.PenaltyExcessCharacter * + (RemainingTokenColumns - RemainingSpace); + break; + } + assert(Split.first != 0); + + // Check if compressing the whitespace range will bring the line length + // under the limit. If that is the case, we perform whitespace compression + // instead of inserting a line break. + unsigned RemainingTokenColumnsAfterCompression = + Token->getLineLengthAfterCompression(RemainingTokenColumns, Split); + if (RemainingTokenColumnsAfterCompression <= RemainingSpace) { + RemainingTokenColumns = RemainingTokenColumnsAfterCompression; + ReflowInProgress = true; + if (!DryRun) + Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); + break; + } + + unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( + LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); + + // When breaking before a tab character, it may be moved by a few columns, + // but will still be expanded to the next tab stop, so we don't save any + // columns. + if (NewRemainingTokenColumns == RemainingTokenColumns) + break; + + assert(NewRemainingTokenColumns < RemainingTokenColumns); + if (!DryRun) + Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); + Penalty += Current.SplitPenalty; + unsigned ColumnsUsed = + Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); + if (ColumnsUsed > ColumnLimit) { + Penalty += Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit); + } + TailOffset += Split.first + Split.second; + RemainingTokenColumns = NewRemainingTokenColumns; + ReflowInProgress = true; + BreakInserted = true; + } + } + + State.Column = RemainingTokenColumns; + + if (BreakInserted) { + assert(State.Reflow); + + // If we break the token inside a parameter list, we need to break before + // the next parameter on all levels, so that the next parameter is clearly + // visible. Line comments already introduce a break. + if (Current.isNot(TT_LineComment)) { + for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) + State.Stack[i].BreakBeforeParameter = true; + } + + Penalty += Current.isStringLiteral() ? Style.PenaltyBreakString + : Style.PenaltyBreakComment; + + State.Stack.back().LastSpace = StartColumn; + } + + Token->updateNextToken(State); + + return Penalty; +} + unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun) { @@ -1306,97 +1412,30 @@ if (Current.UnbreakableTailLength >= ColumnLimit) return 0; - unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength; - bool BreakInserted = false; - // We use a conservative reflowing strategy. Reflow starts after a line is - // broken or the corresponding whitespace compressed. Reflow ends as soon as a - // line that doesn't get reflown with the previous line is reached. - bool ReflowInProgress = false; unsigned Penalty = 0; - unsigned RemainingTokenColumns = 0; - for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); - LineIndex != EndIndex; ++LineIndex) { - BreakableToken::Split SplitBefore(StringRef::npos, 0); - if (ReflowInProgress) { - SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns, - RemainingSpace, CommentPragmasRegex); - } - ReflowInProgress = SplitBefore.first != StringRef::npos; - unsigned TailOffset = - ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0; - if (!DryRun) - Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns, - RemainingSpace, SplitBefore, Whitespaces); - RemainingTokenColumns = Token->getLineLengthAfterSplitBefore( - LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore); - while (RemainingTokenColumns > RemainingSpace) { - BreakableToken::Split Split = Token->getSplit( - LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex); - if (Split.first == StringRef::npos) { - // The last line's penalty is handled in addNextStateToQueue(). - if (LineIndex < EndIndex - 1) - Penalty += Style.PenaltyExcessCharacter * - (RemainingTokenColumns - RemainingSpace); - break; - } - assert(Split.first != 0); - - // Check if compressing the whitespace range will bring the line length - // under the limit. If that is the case, we perform whitespace compression - // instead of inserting a line break. - unsigned RemainingTokenColumnsAfterCompression = - Token->getLineLengthAfterCompression(RemainingTokenColumns, Split); - if (RemainingTokenColumnsAfterCompression <= RemainingSpace) { - RemainingTokenColumns = RemainingTokenColumnsAfterCompression; - ReflowInProgress = true; - if (!DryRun) - Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); - break; - } - - unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( - LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); - - // When breaking before a tab character, it may be moved by a few columns, - // but will still be expanded to the next tab stop, so we don't save any - // columns. - if (NewRemainingTokenColumns == RemainingTokenColumns) - break; + if (!DryRun) { + Penalty = reflowProtrudingToken(Current, State, Token, ColumnLimit, DryRun); + } else { + LineState NoreflowState = State; + NoreflowState.Reflow = false; + unsigned NoreflowPenalty = reflowProtrudingToken(Current, NoreflowState, Token, ColumnLimit, DryRun); - assert(NewRemainingTokenColumns < RemainingTokenColumns); - if (!DryRun) - Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); - Penalty += Current.SplitPenalty; - unsigned ColumnsUsed = - Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); - if (ColumnsUsed > ColumnLimit) { - Penalty += Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit); - } - TailOffset += Split.first + Split.second; - RemainingTokenColumns = NewRemainingTokenColumns; - ReflowInProgress = true; - BreakInserted = true; + if (NoreflowPenalty > 0) { + State.Reflow = true; + Penalty = reflowProtrudingToken(Current, State, Token, ColumnLimit, DryRun); } - } - - State.Column = RemainingTokenColumns; - if (BreakInserted) { - // If we break the token inside a parameter list, we need to break before - // the next parameter on all levels, so that the next parameter is clearly - // visible. Line comments already introduce a break. - if (Current.isNot(TT_LineComment)) { - for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) - State.Stack[i].BreakBeforeParameter = true; + if (NoreflowPenalty <= Penalty) { + State = NoreflowState; + Penalty = NoreflowPenalty; } - - Penalty += Current.isStringLiteral() ? Style.PenaltyBreakString - : Style.PenaltyBreakComment; - - State.Stack.back().LastSpace = StartColumn; } - Token->updateNextToken(State); + // Do not count the penalty twice, it will be added afterwards + if (State.Column > getColumnLimit(State)) { + unsigned ExcessCharacters = State.Column - getColumnLimit(State); + Penalty -= Style.PenaltyExcessCharacter * ExcessCharacters; + } return Penalty; } Index: lib/Format/UnwrappedLineFormatter.cpp =================================================================== --- lib/Format/UnwrappedLineFormatter.cpp +++ lib/Format/UnwrappedLineFormatter.cpp @@ -866,6 +866,7 @@ for (std::deque::iterator I = Path.begin(), E = Path.end(); I != E; ++I) { unsigned Penalty = 0; + State.Reflow = (*I)->State.Reflow; formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -9393,6 +9393,60 @@ EXPECT_EQ("#pragma option -C -A", format("#pragma option -C -A")); } +TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) { + FormatStyle Style = getLLVMStyle(); + Style.ColumnLimit = 20; + + verifyFormat("int a; // the\n" + " // comment", Style); + EXPECT_EQ("int a; /* first line\n" + " * second\n" + " * line third\n" + " * line\n" + " */", + format("int a; /* first line\n" + " * second\n" + " * line third\n" + " * line\n" + " */", Style)); + EXPECT_EQ("int a; // first line\n" + " // second\n" + " // line third\n" + " // line", + format("int a; // first line\n" + " // second line\n" + " // third line", + Style)); + + Style.PenaltyExcessCharacter = 90; + verifyFormat("int a; // the comment", Style); + EXPECT_EQ("int a; // the\n" + " // comment aa", + format("int a; // the comment aa", Style)); + EXPECT_EQ("int a; /* first line\n" + " * second line\n" + " * third line\n" + " */", + format("int a; /* first line\n" + " * second line\n" + " * third line\n" + " */", Style)); + EXPECT_EQ("int a; // first line\n" + " // second line\n" + " // third line", + format("int a; // first line\n" + " // second line\n" + " // third line", + Style)); + EXPECT_EQ("int a; /* first line\n" + " * second\n" + " * line third\n" + " * line\n" + " */", + format("int a; /* first line second line third line" + "\n*/", Style)); +} + #define EXPECT_ALL_STYLES_EQUAL(Styles) \ for (size_t i = 1; i < Styles.size(); ++i) \ EXPECT_EQ(Styles[0], Styles[i]) << "Style #" << i << " of " << Styles.size() \