Index: lib/Format/BreakableToken.cpp =================================================================== --- lib/Format/BreakableToken.cpp +++ lib/Format/BreakableToken.cpp @@ -742,6 +742,7 @@ Prefix.resize(Lines.size()); OriginalPrefix.resize(Lines.size()); for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) { + Lines[i] = Lines[i].ltrim(Blanks); // We need to trim the blanks in case this is not the first line in a // multiline comment. Then the indent is included in Lines[i]. StringRef IndentPrefix = Index: lib/Format/ContinuationIndenter.h =================================================================== --- lib/Format/ContinuationIndenter.h +++ lib/Format/ContinuationIndenter.h @@ -29,6 +29,7 @@ namespace format { class AnnotatedLine; +class BreakableToken; struct FormatToken; struct LineState; struct ParenState; @@ -105,11 +106,20 @@ /// /// \returns An extra penalty induced by reformatting the token. unsigned reformatRawStringLiteral(const FormatToken &Current, - unsigned StartColumn, LineState &State, - StringRef Delimiter, + LineState &State, const FormatStyle &RawStringStyle, bool DryRun); + /// \brief If the current token is at the end of the current line, handle + /// the transition to the next line. + unsigned handleEndOfLine(const FormatToken &Current, LineState &State, + bool DryRun, bool AllowBreak); + + /// \brief If \p Current is a raw string that is configured to be reformatted, + /// return the style to be used. + llvm::Optional getRawStringStyle(const FormatToken &Current, + const LineState &State); + /// \brief If the current token sticks out over the end of the line, break /// it if possible. /// @@ -120,7 +130,13 @@ /// penalty for the column limit violation in the last line (and in single /// line tokens) is handled in \c addNextStateToQueue. unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, - bool DryRun); + bool AllowBreak, bool DryRun); + + /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr + /// if the current token cannot be broken. + std::unique_ptr + createBreakableToken(const FormatToken &Current, LineState &State, + bool AllowBreak); /// \brief Appends the next token to \p State and updates information /// necessary for indentation. Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -1008,8 +1008,8 @@ moveStatePastFakeLParens(State, Newline); moveStatePastScopeCloser(State); - bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak && - !State.Stack.back().NoLineBreakInOperand; + bool AllowBreak = !State.Stack.back().NoLineBreak && + !State.Stack.back().NoLineBreakInOperand; moveStatePastScopeOpener(State, Newline); moveStatePastFakeRParens(State); @@ -1023,13 +1023,9 @@ State.Column += Current.ColumnWidth; State.NextToken = State.NextToken->Next; - unsigned Penalty = 0; - if (CanBreakProtrudingToken) - Penalty = breakProtrudingToken(Current, State, DryRun); - if (State.Column > getColumnLimit(State)) { - unsigned ExcessCharacters = State.Column - getColumnLimit(State); - Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; - } + + unsigned Penalty = + handleEndOfLine(Current, State, DryRun, AllowBreak); if (Current.Role) Current.Role->formatFromToken(State, this, DryRun); @@ -1282,8 +1278,10 @@ } unsigned ContinuationIndenter::reformatRawStringLiteral( - const FormatToken &Current, unsigned StartColumn, LineState &State, - StringRef Delimiter, const FormatStyle &RawStringStyle, bool DryRun) { + const FormatToken &Current, LineState &State, + const FormatStyle &RawStringStyle, bool DryRun) { + unsigned StartColumn = State.Column - Current.ColumnWidth; + auto Delimiter = *getRawStringDelimiter(Current.TokenText); // The text of a raw string is between the leading 'R"delimiter(' and the // trailing 'delimiter)"'. unsigned PrefixSize = 3 + Delimiter.size(); @@ -1353,9 +1351,6 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, LineState &State) { - if (!Current.IsMultiline) - return 0; - // Break before further function parameters on all levels. for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) State.Stack[i].BreakBeforeParameter = true; @@ -1370,42 +1365,56 @@ return 0; } -unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, - LineState &State, - bool DryRun) { +unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current, + LineState &State, bool DryRun, + bool AllowBreak) { + unsigned Penalty = 0; // Compute the raw string style to use in case this is a raw string literal // that can be reformatted. - llvm::Optional Delimiter = None; - llvm::Optional RawStringStyle = None; - if (Current.isStringLiteral()) - Delimiter = getRawStringDelimiter(Current.TokenText); - if (Delimiter) - RawStringStyle = RawStringFormats.get(*Delimiter); - - // Don't break multi-line tokens other than block comments and raw string - // literals. Instead, just update the state. - if (Current.isNot(TT_BlockComment) && !RawStringStyle && Current.IsMultiline) - return addMultilineToken(Current, State); - - // Don't break implicit string literals or import statements. - if (Current.is(TT_ImplicitStringLiteral) || - State.Line->Type == LT_ImportStatement) - return 0; + auto RawStringStyle = getRawStringStyle(Current, State); + if (RawStringStyle) { + Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun); + } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) { + // Don't break multi-line tokens other than block comments and raw string + // literals. Instead, just update the state. + Penalty = addMultilineToken(Current, State); + } else if (State.Line->Type != LT_ImportStatement) { + // We generally don't break import statements. + Penalty = breakProtrudingToken(Current, State, AllowBreak, DryRun); + } + if (State.Column > getColumnLimit(State)) { + unsigned ExcessCharacters = State.Column - getColumnLimit(State); + Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; + } + return Penalty; +} - if (!Current.isStringLiteral() && !Current.is(tok::comment)) - return 0; +llvm::Optional +ContinuationIndenter::getRawStringStyle(const FormatToken &Current, + const LineState &State) { + if (!Current.isStringLiteral()) + return None; + auto Delimiter = getRawStringDelimiter(Current.TokenText); + if (!Delimiter) + return None; + auto RawStringStyle = RawStringFormats.get(*Delimiter); + if (!RawStringStyle) + return None; + RawStringStyle->ColumnLimit = getColumnLimit(State); + return RawStringStyle; +} - std::unique_ptr Token; +std::unique_ptr ContinuationIndenter::createBreakableToken( + const FormatToken &Current, LineState &State, bool AllowBreak) { unsigned StartColumn = State.Column - Current.ColumnWidth; - unsigned ColumnLimit = getColumnLimit(State); - if (Current.isStringLiteral()) { // FIXME: String literal breaking is currently disabled for Java and JS, as // it requires strings to be merged using "+" which we don't support. if (Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript || - !Style.BreakStringLiterals) - return 0; + !Style.BreakStringLiterals || + !AllowBreak) + return nullptr; // Don't break string literals inside preprocessor directives (except for // #define directives, as their contents are stored in separate lines and @@ -1413,17 +1422,12 @@ // This way we avoid breaking code with line directives and unknown // preprocessor directives that contain long string literals. if (State.Line->Type == LT_PreprocessorDirective) - return 0; + return nullptr; // Exempts unterminated string literals from line breaking. The user will // likely want to terminate the string before any line breaking is done. if (Current.IsUnterminatedLiteral) - return 0; + return nullptr; - if (RawStringStyle) { - RawStringStyle->ColumnLimit = ColumnLimit; - return reformatRawStringLiteral(Current, StartColumn, State, *Delimiter, - *RawStringStyle, DryRun); - } StringRef Text = Current.TokenText; StringRef Prefix; StringRef Postfix; @@ -1437,36 +1441,48 @@ Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { - Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix, - Postfix, State.Line->InPPDirective, - Encoding, Style)); - } else { - return 0; + return llvm::make_unique( + Current, StartColumn, Prefix, Postfix, State.Line->InPPDirective, + Encoding, Style); } } else if (Current.is(TT_BlockComment)) { if (!Style.ReflowComments || // If a comment token switches formatting, like // /* clang-format on */, we don't want to break it further, // but we may still want to adjust its indentation. - switchesFormatting(Current)) - return addMultilineToken(Current, State); - Token.reset(new BreakableBlockComment( + switchesFormatting(Current)) { + return nullptr; + } + return llvm::make_unique( Current, StartColumn, Current.OriginalColumn, !Current.Previous, - State.Line->InPPDirective, Encoding, Style)); + State.Line->InPPDirective, Encoding, Style); } else if (Current.is(TT_LineComment) && (Current.Previous == nullptr || Current.Previous->isNot(TT_ImplicitStringLiteral))) { if (!Style.ReflowComments || CommentPragmasRegex.match(Current.TokenText.substr(2)) || switchesFormatting(Current)) - return 0; - Token.reset(new BreakableLineCommentSection( + return nullptr; + return llvm::make_unique( Current, StartColumn, Current.OriginalColumn, !Current.Previous, - /*InPPDirective=*/false, Encoding, Style)); + /*InPPDirective=*/false, Encoding, Style); + } + return nullptr; +} + +unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, + LineState &State, + bool AllowBreak, + bool DryRun) { + std::unique_ptr Token = + createBreakableToken(Current, State, AllowBreak); + if (!Token) + return 0; + unsigned ColumnLimit = getColumnLimit(State); + unsigned StartColumn = State.Column - Current.ColumnWidth; + if (Current.is(TT_LineComment)) { // We don't insert backslashes when breaking line comments. ColumnLimit = Style.ColumnLimit; - } else { - return 0; } if (Current.UnbreakableTailLength >= ColumnLimit) return 0; Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -6319,7 +6319,8 @@ "#include_next " "#include \"abc.h\" // this is included for ABC\n" "#include \"some long include\" // with a comment\n" - "#include \"some very long include paaaaaaaaaaaaaaaaaaaaaaath\"", + "#include \"some very long include path\"\n" + "#include \n", getLLVMStyleWithColumns(35)); EXPECT_EQ("#include \"a.h\"", format("#include \"a.h\"")); EXPECT_EQ("#include ", format("#include")); Index: unittests/Format/FormatTestComments.cpp =================================================================== --- unittests/Format/FormatTestComments.cpp +++ unittests/Format/FormatTestComments.cpp @@ -680,8 +680,7 @@ EXPECT_EQ("{\n" " //\n" " //\\\n" - " // long 1 2 3 4\n" - " // 5\n" + " // long 1 2 3 4 5\n" "}", format("{\n" " //\n"