Index: lib/Format/ContinuationIndenter.h =================================================================== --- lib/Format/ContinuationIndenter.h +++ lib/Format/ContinuationIndenter.h @@ -27,6 +27,7 @@ namespace format { class AnnotatedLine; +class BreakableToken; struct FormatToken; struct LineState; struct ParenState; @@ -100,6 +101,11 @@ unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun); + unsigned reflowProtrudingToken(const FormatToken & Current, LineState & State, + std::unique_ptr & Token, + unsigned ColumnLimit, bool DryRun); + + /// \brief Appends the next token to \p State and updates information /// necessary for indentation. /// @@ -350,6 +356,8 @@ /// \brief The indent of the first token. unsigned FirstIndent; + bool Reflow = true; + /// \brief The line that is being formatted. /// /// Does not need to be considered for memoization because it doesn't change. Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -1219,92 +1219,10 @@ return 0; } -unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, - LineState &State, - bool DryRun) { - // Don't break multi-line tokens other than block comments. Instead, just - // update the state. - if (Current.isNot(TT_BlockComment) && Current.IsMultiline) - return addMultilineToken(Current, State); - - // Don't break implicit string literals or import statements. - if (Current.is(TT_ImplicitStringLiteral) || - State.Line->Type == LT_ImportStatement) - return 0; - - if (!Current.isStringLiteral() && !Current.is(tok::comment)) - return 0; - - std::unique_ptr Token; +unsigned ContinuationIndenter::reflowProtrudingToken(const FormatToken &Current, LineState &State, + std::unique_ptr &Token, + unsigned ColumnLimit, bool DryRun) { unsigned StartColumn = State.Column - Current.ColumnWidth; - unsigned ColumnLimit = getColumnLimit(State); - - if (Current.isStringLiteral()) { - // FIXME: String literal breaking is currently disabled for Java and JS, as - // it requires strings to be merged using "+" which we don't support. - if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript || - !Style.BreakStringLiterals) - return 0; - - // Don't break string literals inside preprocessor directives (except for - // #define directives, as their contents are stored in separate lines and - // are not affected by this check). - // This way we avoid breaking code with line directives and unknown - // preprocessor directives that contain long string literals. - if (State.Line->Type == LT_PreprocessorDirective) - return 0; - // Exempts unterminated string literals from line breaking. The user will - // likely want to terminate the string before any line breaking is done. - if (Current.IsUnterminatedLiteral) - return 0; - - StringRef Text = Current.TokenText; - StringRef Prefix; - StringRef Postfix; - // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. - // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to - // reduce the overhead) for each FormatToken, which is a string, so that we - // don't run multiple checks here on the hot path. - if ((Text.endswith(Postfix = "\"") && - (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") || - Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") || - Text.startswith(Prefix = "u8\"") || - Text.startswith(Prefix = "L\""))) || - (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { - Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix, - Postfix, State.Line->InPPDirective, - Encoding, Style)); - } else { - return 0; - } - } else if (Current.is(TT_BlockComment)) { - if (!Current.isTrailingComment() || !Style.ReflowComments || - // If a comment token switches formatting, like - // /* clang-format on */, we don't want to break it further, - // but we may still want to adjust its indentation. - switchesFormatting(Current)) - return addMultilineToken(Current, State); - Token.reset(new BreakableBlockComment( - Current, StartColumn, Current.OriginalColumn, !Current.Previous, - State.Line->InPPDirective, Encoding, Style)); - } else if (Current.is(TT_LineComment) && - (Current.Previous == nullptr || - Current.Previous->isNot(TT_ImplicitStringLiteral))) { - if (!Style.ReflowComments || - CommentPragmasRegex.match(Current.TokenText.substr(2)) || - switchesFormatting(Current)) - return 0; - Token.reset(new BreakableLineCommentSection( - Current, StartColumn, Current.OriginalColumn, !Current.Previous, - /*InPPDirective=*/false, Encoding, Style)); - // We don't insert backslashes when breaking line comments. - ColumnLimit = Style.ColumnLimit; - } else { - return 0; - } - if (Current.UnbreakableTailLength >= ColumnLimit) - return 0; unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength; bool BreakInserted = false; @@ -1330,14 +1248,18 @@ RemainingSpace, SplitBefore, Whitespaces); RemainingTokenColumns = Token->getLineLengthAfterSplitBefore( LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore); + if (!State.Reflow) { + if (RemainingTokenColumns > RemainingSpace) + Penalty += Style.PenaltyExcessCharacter * + (RemainingTokenColumns - RemainingSpace); + continue; + } while (RemainingTokenColumns > RemainingSpace) { BreakableToken::Split Split = Token->getSplit( LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex); if (Split.first == StringRef::npos) { - // The last line's penalty is handled in addNextStateToQueue(). - if (LineIndex < EndIndex - 1) - Penalty += Style.PenaltyExcessCharacter * - (RemainingTokenColumns - RemainingSpace); + Penalty += Style.PenaltyExcessCharacter * + (RemainingTokenColumns - RemainingSpace); break; } assert(Split.first != 0); @@ -1393,6 +1315,8 @@ State.Column = RemainingTokenColumns; if (BreakInserted) { + assert(State.Reflow); + // If we break the token inside a parameter list, we need to break before // the next parameter on all levels, so that the next parameter is clearly // visible. Line comments already introduce a break. @@ -1412,6 +1336,121 @@ return Penalty; } +unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, + LineState &State, + bool DryRun) { + // Don't break multi-line tokens other than block comments. Instead, just + // update the state. + if (Current.isNot(TT_BlockComment) && Current.IsMultiline) + return addMultilineToken(Current, State); + + // Don't break implicit string literals or import statements. + if (Current.is(TT_ImplicitStringLiteral) || + State.Line->Type == LT_ImportStatement) + return 0; + + if (!Current.isStringLiteral() && !Current.is(tok::comment)) + return 0; + + std::unique_ptr Token; + unsigned StartColumn = State.Column - Current.ColumnWidth; + unsigned ColumnLimit = getColumnLimit(State); + + if (Current.isStringLiteral()) { + // FIXME: String literal breaking is currently disabled for Java and JS, as + // it requires strings to be merged using "+" which we don't support. + if (Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_JavaScript || + !Style.BreakStringLiterals) + return 0; + + // Don't break string literals inside preprocessor directives (except for + // #define directives, as their contents are stored in separate lines and + // are not affected by this check). + // This way we avoid breaking code with line directives and unknown + // preprocessor directives that contain long string literals. + if (State.Line->Type == LT_PreprocessorDirective) + return 0; + // Exempts unterminated string literals from line breaking. The user will + // likely want to terminate the string before any line breaking is done. + if (Current.IsUnterminatedLiteral) + return 0; + + StringRef Text = Current.TokenText; + StringRef Prefix; + StringRef Postfix; + // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. + // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to + // reduce the overhead) for each FormatToken, which is a string, so that we + // don't run multiple checks here on the hot path. + if ((Text.endswith(Postfix = "\"") && + (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") || + Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") || + Text.startswith(Prefix = "u8\"") || + Text.startswith(Prefix = "L\""))) || + (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { + Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix, + Postfix, State.Line->InPPDirective, + Encoding, Style)); + } else { + return 0; + } + } else if (Current.is(TT_BlockComment)) { + if (!Current.isTrailingComment() || !Style.ReflowComments || + // If a comment token switches formatting, like + // /* clang-format on */, we don't want to break it further, + // but we may still want to adjust its indentation. + switchesFormatting(Current)) + return addMultilineToken(Current, State); + Token.reset(new BreakableBlockComment( + Current, StartColumn, Current.OriginalColumn, !Current.Previous, + State.Line->InPPDirective, Encoding, Style)); + } else if (Current.is(TT_LineComment) && + (Current.Previous == nullptr || + Current.Previous->isNot(TT_ImplicitStringLiteral))) { + if (!Style.ReflowComments || + CommentPragmasRegex.match(Current.TokenText.substr(2)) || + switchesFormatting(Current)) + return 0; + Token.reset(new BreakableLineCommentSection( + Current, StartColumn, Current.OriginalColumn, !Current.Previous, + /*InPPDirective=*/false, Encoding, Style)); + // We don't insert backslashes when breaking line comments. + ColumnLimit = Style.ColumnLimit; + } else { + return 0; + } + if (Current.UnbreakableTailLength >= ColumnLimit) + return 0; + + unsigned Penalty = 0; + if (!DryRun) { + Penalty = reflowProtrudingToken(Current, State, Token, ColumnLimit, DryRun); + } else { + LineState NoreflowState = State; + NoreflowState.Reflow = false; + unsigned NoreflowPenalty = reflowProtrudingToken(Current, NoreflowState, Token, ColumnLimit, DryRun); + + if (NoreflowPenalty > 0) { + State.Reflow = true; + Penalty = reflowProtrudingToken(Current, State, Token, ColumnLimit, DryRun); + } + + if (NoreflowPenalty <= Penalty) { + State = NoreflowState; + Penalty = NoreflowPenalty; + } + } + + // Do not count the penalty twice, it will be added afterwards + if (State.Column > getColumnLimit(State)) { + unsigned ExcessCharacters = State.Column - getColumnLimit(State); + Penalty -= Style.PenaltyExcessCharacter * ExcessCharacters; + } + + return Penalty; +} + unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const { // In preprocessor directives reserve two chars for trailing " \" return Style.ColumnLimit - (State.Line->InPPDirective ? 2 : 0); Index: lib/Format/UnwrappedLineFormatter.cpp =================================================================== --- lib/Format/UnwrappedLineFormatter.cpp +++ lib/Format/UnwrappedLineFormatter.cpp @@ -885,6 +885,7 @@ for (std::deque::iterator I = Path.begin(), E = Path.end(); I != E; ++I) { unsigned Penalty = 0; + State.Reflow = (*I)->State.Reflow; formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -9475,6 +9475,60 @@ EXPECT_EQ("#pragma option -C -A", format("#pragma option -C -A")); } +TEST_F(FormatTest, OptimizeBreakPenaltyVsExcess) { + FormatStyle Style = getLLVMStyle(); + Style.ColumnLimit = 20; + + verifyFormat("int a; // the\n" + " // comment", Style); + EXPECT_EQ("int a; /* first line\n" + " * second\n" + " * line third\n" + " * line\n" + " */", + format("int a; /* first line\n" + " * second\n" + " * line third\n" + " * line\n" + " */", Style)); + EXPECT_EQ("int a; // first line\n" + " // second\n" + " // line third\n" + " // line", + format("int a; // first line\n" + " // second line\n" + " // third line", + Style)); + + Style.PenaltyExcessCharacter = 90; + verifyFormat("int a; // the comment", Style); + EXPECT_EQ("int a; // the\n" + " // comment aa", + format("int a; // the comment aa", Style)); + EXPECT_EQ("int a; /* first line\n" + " * second line\n" + " * third line\n" + " */", + format("int a; /* first line\n" + " * second line\n" + " * third line\n" + " */", Style)); + EXPECT_EQ("int a; // first line\n" + " // second line\n" + " // third line", + format("int a; // first line\n" + " // second line\n" + " // third line", + Style)); + EXPECT_EQ("int a; /* first line\n" + " * second\n" + " * line third\n" + " * line\n" + " */", + format("int a; /* first line second line third line" + "\n*/", Style)); +} + #define EXPECT_ALL_STYLES_EQUAL(Styles) \ for (size_t i = 1; i < Styles.size(); ++i) \ EXPECT_EQ(Styles[0], Styles[i]) << "Style #" << i << " of " << Styles.size() \