Index: lib/Format/BreakableToken.h =================================================================== --- lib/Format/BreakableToken.h +++ lib/Format/BreakableToken.h @@ -33,19 +33,32 @@ struct FormatStyle; -/// \brief Base class for strategies on how to break tokens. +/// \brief Base class for tokens / ranges of tokens that can allow breaking +/// within the tokens - for example, to avoid whitespace beyond the column +/// limit, or to reflow text. /// -/// This is organised around the concept of a \c Split, which is a whitespace -/// range that signifies a position of the content of a token where a -/// reformatting might be done. Operating with splits is divided into 3 -/// operations: +/// Generally, a breakable token consists of logical lines, addressed by a line +/// index. For example, in a sequence of line comments, each line comment is its +/// own logical line; similarly, for a block comment, each line in the block +/// comment is on its own logical line. +/// +/// There are two methods to compute the layout of the token: +/// - getRangeLength measures the number of columns needed for a range of text +/// within a logical line, and +/// - getContentStartColumn returns the start column at which we want the +/// content of a logical line to start (potentially after introducing a line +/// break). +/// +/// The mechanism to adapt the layout of the breakable token is organised +/// around the concept of a \c Split, which is a whitespace range that signifies +/// a position of the content of a token where a reformatting might be done. +/// +/// Operating with splits is divided into two operations: /// - getSplit, for finding a split starting at a position, -/// - getLineLengthAfterSplit, for calculating the size in columns of the rest -/// of the content after a split has been used for breaking, and /// - insertBreak, for executing the split using a whitespace manager. /// /// There is a pair of operations that are used to compress a long whitespace -/// range with a single space if that will bring the line lenght under the +/// range with a single space if that will bring the line length under the /// column limit: /// - getLineLengthAfterCompression, for calculating the size in columns of the /// line after a whitespace range has been compressed, and @@ -56,14 +69,12 @@ /// For tokens where the whitespace before each line needs to be also /// reformatted, for example for tokens supporting reflow, there are analogous /// operations that might be executed before the main line breaking occurs: -/// - getSplitBefore, for finding a split such that the content preceding it +/// - getReflowSplit, for finding a split such that the content preceding it /// needs to be specially reflown, +/// - reflow, for executing the split using a whitespace manager, /// - introducesBreakBefore, for checking if reformatting the beginning /// of the content introduces a line break before it, -/// - getLineLengthAfterSplitBefore, for calculating the line length in columns -/// of the remainder of the content after the beginning of the content has -/// been reformatted, and -/// - replaceWhitespaceBefore, for executing the reflow using a whitespace +/// - adaptStartOfLine, for executing the reflow using a whitespace /// manager. /// /// For tokens that require the whitespace after the last line to be @@ -72,13 +83,9 @@ /// that might be executed after the last line has been reformatted: /// - getSplitAfterLastLine, for finding a split after the last line that needs /// to be reflown, -/// - getLineLengthAfterSplitAfterLastLine, for calculating the line length in -/// columns of the remainder of the token, and /// - replaceWhitespaceAfterLastLine, for executing the reflow using a /// whitespace manager. /// -/// FIXME: The interface seems set in stone, so we might want to just pull the -/// strategy into the class, instead of controlling it from the outside. class BreakableToken { public: /// \brief Contains starting character index and length of split. @@ -89,26 +96,44 @@ /// \brief Returns the number of lines in this token in the original code. virtual unsigned getLineCount() const = 0; - /// \brief Returns the number of columns required to format the piece of line - /// at \p LineIndex, from byte offset \p TailOffset with length \p Length. + /// \brief Returns the number of columns required to format the text in the + /// byte range [\p Offset, \p Offset \c + \p Length). + /// + /// \p Offset is the byte offset from the start of the content of the line + /// at \p LineIndex. + /// + /// \p Length can be StringRef::npos, which means "to the end of the line", + /// including potentially unbreakable sequences of tokens following after + /// the end of the content of this token. + /// + /// \p StartColumn is the column at which the text starts in the formatted + /// file, needed to compute tab stops correctly. + virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const = 0; + + /// \brief Returns the column at which content in line \p LineIndex starts, + /// assuming no reflow. /// - /// Note that previous breaks are not taken into account. \p TailOffset is - /// always specified from the start of the (original) line. - /// \p Length can be set to StringRef::npos, which means "to the end of line". - virtual unsigned - getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const = 0; + /// If \p Break is true, returns the column at which the line should start + /// after the line break. + /// If \p Break is false, returns the column at which the line itself will + /// start. + virtual unsigned getContentStartColumn(unsigned LineIndex, + bool Break) const = 0; /// \brief Returns a range (offset, length) at which to break the line at /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not - /// violate \p ColumnLimit. + /// violate \p ColumnLimit, assuming the text starting at \p TailOffset in + /// the token is formatted starting at ContentStartColumn in the reformatted + /// file. virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit, + unsigned ColumnLimit, unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const = 0; /// \brief Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) = 0; + WhitespaceManager &Whitespaces) const = 0; /// \brief Returns the number of columns required to format the piece of line /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range @@ -120,48 +145,37 @@ /// space. virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) = 0; + WhitespaceManager &Whitespaces) const = 0; - /// \brief Returns a whitespace range (offset, length) of the content at - /// \p LineIndex such that the content preceding this range needs to be - /// reformatted before any breaks are made to this line. + /// \brief Returns a whitespace range (offset, length) of the content at \p + /// LineIndex such that the content of that line is reflown to the end of the + /// previous one. /// - /// \p PreviousEndColumn is the end column of the previous line after - /// formatting. + /// Returning (StringRef::npos, 0) indicates reflowing is not possible. /// - /// A result having offset == StringRef::npos means that no piece of the line - /// needs to be reformatted before any breaks are made. - virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, + /// The range will include any whitespace preceding the specified line's + /// content. + /// + /// If the split is not contained within one token, for example when reflowing + /// line comments, returns (0, ). + virtual Split getReflowSplit(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const { return Split(StringRef::npos, 0); } + /// \brief Reflows the current line into the end of the previous one. + virtual void reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const {} + /// \brief Returns whether there will be a line break at the start of the /// token. virtual bool introducesBreakBeforeToken() const { return false; } - /// \brief Returns the number of columns required to format the piece of line - /// at \p LineIndex after the content preceding the whitespace range specified - /// \p SplitBefore has been reformatted, but before any breaks are made to - /// this line. - virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, - unsigned TailOffset, - unsigned PreviousEndColumn, - unsigned ColumnLimit, - Split SplitBefore) const { - return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); - } - /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. - /// Performs a reformatting of the content at \p LineIndex preceding the - /// whitespace range \p SplitBefore. - virtual void replaceWhitespaceBefore(unsigned LineIndex, - unsigned PreviousEndColumn, - unsigned ColumnLimit, Split SplitBefore, - WhitespaceManager &Whitespaces) {} + virtual void adaptStartOfLine(unsigned LineIndex, + WhitespaceManager &Whitespaces) const {} /// \brief Returns a whitespace range (offset, length) of the content at /// the last line that needs to be reformatted after the last line has been @@ -169,28 +183,15 @@ /// /// A result having offset == StringRef::npos means that no reformat is /// necessary. - virtual Split getSplitAfterLastLine(unsigned TailOffset, - unsigned ColumnLimit) const { + virtual Split getSplitAfterLastLine(unsigned TailOffset) const { return Split(StringRef::npos, 0); } - /// \brief Returns the number of columns required to format the piece token - /// after the last line after a reformat of the whitespace range \p - /// \p SplitAfterLastLine on the last line has been performed. - virtual unsigned - getLineLengthAfterSplitAfterLastLine(unsigned TailOffset, - Split SplitAfterLastLine) const { - return getLineLengthAfterSplit(getLineCount() - 1, - TailOffset + SplitAfterLastLine.first + - SplitAfterLastLine.second, - StringRef::npos); - } - /// \brief Replaces the whitespace from \p SplitAfterLastLine on the last line /// after the last line has been formatted by performing a reformatting. - virtual void replaceWhitespaceAfterLastLine(unsigned TailOffset, - Split SplitAfterLastLine, - WhitespaceManager &Whitespaces) { + void replaceWhitespaceAfterLastLine(unsigned TailOffset, + Split SplitAfterLastLine, + WhitespaceManager &Whitespaces) const { insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine, Whitespaces); } @@ -212,32 +213,7 @@ const FormatStyle &Style; }; -/// \brief Base class for single line tokens that can be broken. -/// -/// \c getSplit() needs to be implemented by child classes. -class BreakableSingleLineToken : public BreakableToken { -public: - unsigned getLineCount() const override; - unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const override; - -protected: - BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn, - StringRef Prefix, StringRef Postfix, - bool InPPDirective, encoding::Encoding Encoding, - const FormatStyle &Style); - - // The column in which the token starts. - unsigned StartColumn; - // The prefix a line needs after a break in the token. - StringRef Prefix; - // The postfix a line needs before introducing a break. - StringRef Postfix; - // The token text excluding the prefix and postfix. - StringRef Line; -}; - -class BreakableStringLiteral : public BreakableSingleLineToken { +class BreakableStringLiteral : public BreakableToken { public: /// \brief Creates a breakable token for a single line string literal. /// @@ -249,11 +225,30 @@ const FormatStyle &Style); Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ReflowColumn, llvm::Regex &CommentPragmasRegex) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; + WhitespaceManager &Whitespaces) const override; void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override {} + WhitespaceManager &Whitespaces) const override {} + unsigned getLineCount() const override; + unsigned getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const override; + unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; + +protected: + // The column in which the token starts. + unsigned StartColumn; + // The prefix a line needs after a break in the token. + StringRef Prefix; + // The postfix a line needs before introducing a break. + StringRef Postfix; + // The token text excluding the prefix and postfix. + StringRef Line; + // Length of the sequence of tokens after this string literal that cannot + // contain line breaks. + unsigned UnbreakableTailLength; }; class BreakableComment : public BreakableToken { @@ -269,19 +264,12 @@ public: unsigned getLineCount() const override; Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ReflowColumn, llvm::Regex &CommentPragmasRegex) const override; void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; + WhitespaceManager &Whitespaces) const override; protected: - virtual unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const = 0; - - // Returns a split that divides Text into a left and right parts, such that - // the left part is suitable for reflowing after PreviousEndColumn. - Split getReflowSplit(StringRef Text, StringRef ReflowPrefix, - unsigned PreviousEndColumn, unsigned ColumnLimit) const; - // Returns the token containing the line at LineIndex. const FormatToken &tokenAt(unsigned LineIndex) const; @@ -340,24 +328,20 @@ bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); - unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const override; + unsigned getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const override; + unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, + WhitespaceManager &Whitespaces) const override; + Split getReflowSplit(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; + void reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const override; bool introducesBreakBeforeToken() const override; - unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, - unsigned TailOffset, - unsigned PreviousEndColumn, - unsigned ColumnLimit, - Split SplitBefore) const override; - void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, Split SplitBefore, - WhitespaceManager &Whitespaces) override; - Split getSplitAfterLastLine(unsigned TailOffset, - unsigned ColumnLimit) const override; + void adaptStartOfLine(unsigned LineIndex, + WhitespaceManager &Whitespaces) const override; + Split getSplitAfterLastLine(unsigned TailOffset) const override; bool mayReflow(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; @@ -373,14 +357,6 @@ // considered part of the text). void adjustWhitespace(unsigned LineIndex, int IndentDelta); - // Computes the end column if the full Content from LineIndex gets reflown - // after PreviousEndColumn. - unsigned getReflownColumn(StringRef Content, unsigned LineIndex, - unsigned PreviousEndColumn) const; - - unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const override; - // The column at which the text of a broken line should start. // Note that an optional decoration would go before that column. // IndentAtLineBreak is a uniform position for all lines in a block comment, @@ -416,29 +392,23 @@ bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); - unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const override; + unsigned getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const override; + unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, + WhitespaceManager &Whitespaces) const override; + Split getReflowSplit(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; - unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, - unsigned TailOffset, - unsigned PreviousEndColumn, - unsigned ColumnLimit, - Split SplitBefore) const override; - void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, Split SplitBefore, - WhitespaceManager &Whitespaces) override; + void reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const override; + void adaptStartOfLine(unsigned LineIndex, + WhitespaceManager &Whitespaces) const override; void updateNextToken(LineState &State) const override; bool mayReflow(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; private: - unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const override; - // OriginalPrefix[i] contains the original prefix of line i, including // trailing whitespace before the start of the content. The indentation // preceding the prefix is not included. Index: lib/Format/BreakableToken.cpp =================================================================== --- lib/Format/BreakableToken.cpp +++ lib/Format/BreakableToken.cpp @@ -67,6 +67,8 @@ unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding) { + DEBUG(llvm::dbgs() << "Comment split: \"" << Text << ", " << ColumnLimit + << "\"\n"); if (ColumnLimit <= ContentStartColumn + 1) return BreakableToken::Split(StringRef::npos, 0); @@ -184,47 +186,47 @@ return RemainingTokenColumns + 1 - Split.second; } -unsigned BreakableSingleLineToken::getLineCount() const { return 1; } +unsigned BreakableStringLiteral::getLineCount() const { return 1; } -unsigned BreakableSingleLineToken::getLineLengthAfterSplit( - unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const { - return StartColumn + Prefix.size() + Postfix.size() + - encoding::columnWidthWithTabs(Line.substr(TailOffset, Length), - StartColumn + Prefix.size(), +unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex, + unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const { + assert((Length == StringRef::npos) && + "Getting the length of a part of the string literal indicates that " + "the code tries to reflow it."); + return UnbreakableTailLength + Postfix.size() + + encoding::columnWidthWithTabs(Line.substr(Offset, Length), StartColumn, Style.TabWidth, Encoding); } -BreakableSingleLineToken::BreakableSingleLineToken( +unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex, + bool Break) const { + return StartColumn + Prefix.size(); +} + +BreakableStringLiteral::BreakableStringLiteral( const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) : BreakableToken(Tok, InPPDirective, Encoding, Style), - StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { + StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix), + UnbreakableTailLength(Tok.UnbreakableTailLength) { assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); Line = Tok.TokenText.substr( Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); } -BreakableStringLiteral::BreakableStringLiteral( - const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, - StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, - const FormatStyle &Style) - : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective, - Encoding, Style) {} - -BreakableToken::Split -BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit, - llvm::Regex &CommentPragmasRegex) const { - return getStringSplit(Line.substr(TailOffset), - StartColumn + Prefix.size() + Postfix.size(), - ColumnLimit, Style.TabWidth, Encoding); +BreakableToken::Split BreakableStringLiteral::getSplit( + unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const { + return getStringSplit(Line.substr(TailOffset), ContentStartColumn, + ColumnLimit - Postfix.size(), Style.TabWidth, Encoding); } void BreakableStringLiteral::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { + WhitespaceManager &Whitespaces) const { Whitespaces.replaceWhitespaceInToken( Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, Prefix, InPPDirective, 1, StartColumn); @@ -241,19 +243,19 @@ BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit, + unsigned ColumnLimit, unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const { // Don't break lines matching the comment pragmas regex. if (CommentPragmasRegex.match(Content[LineIndex])) return Split(StringRef::npos, 0); return getCommentSplit(Content[LineIndex].substr(TailOffset), - getContentStartColumn(LineIndex, TailOffset), - ColumnLimit, Style.TabWidth, Encoding); + ContentStartColumn, ColumnLimit, Style.TabWidth, + Encoding); } -void BreakableComment::compressWhitespace(unsigned LineIndex, - unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { +void BreakableComment::compressWhitespace( + unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) const { StringRef Text = Content[LineIndex].substr(TailOffset); // Text is relative to the content line, but Whitespaces operates relative to // the start of the corresponding token, so compute the start of the Split @@ -267,44 +269,6 @@ /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); } -BreakableToken::Split -BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix, - unsigned PreviousEndColumn, - unsigned ColumnLimit) const { - unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size(); - StringRef TrimmedText = Text.rtrim(Blanks); - // This is the width of the resulting line in case the full line of Text gets - // reflown up starting at ReflowStartColumn. - unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs( - TrimmedText, ReflowStartColumn, - Style.TabWidth, Encoding); - // If the full line fits up, we return a reflow split after it, - // otherwise we compute the largest piece of text that fits after - // ReflowStartColumn. - Split ReflowSplit = - FullWidth <= ColumnLimit - ? Split(TrimmedText.size(), Text.size() - TrimmedText.size()) - : getCommentSplit(Text, ReflowStartColumn, ColumnLimit, - Style.TabWidth, Encoding); - - // We need to be extra careful here, because while it's OK to keep a long line - // if it can't be broken into smaller pieces (like when the first word of a - // long line is longer than the column limit), it's not OK to reflow that long - // word up. So we recompute the size of the previous line after reflowing and - // only return the reflow split if that's under the line limit. - if (ReflowSplit.first != StringRef::npos && - // Check if the width of the newly reflown line is under the limit. - PreviousEndColumn + ReflowPrefix.size() + - encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first), - PreviousEndColumn + - ReflowPrefix.size(), - Style.TabWidth, Encoding) <= - ColumnLimit) { - return ReflowSplit; - } - return Split(StringRef::npos, 0); -} - const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const { return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok; } @@ -501,30 +465,39 @@ IndentDelta; } -unsigned BreakableBlockComment::getLineLengthAfterSplit( - unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const { - unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset); +unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex, + unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const { unsigned LineLength = - ContentStartColumn + encoding::columnWidthWithTabs( - Content[LineIndex].substr(TailOffset, Length), - ContentStartColumn, Style.TabWidth, Encoding); + encoding::columnWidthWithTabs(Content[LineIndex].substr(Offset, Length), + StartColumn, Style.TabWidth, Encoding); + // FIXME: Test that this is handled correctly for Length != npos: + // npos includes a possible postfix, but for Length != npos we want the + // precise size of the content, not including any possible postfix. // The last line gets a "*/" postfix. if (LineIndex + 1 == Lines.size()) { LineLength += 2; // We never need a decoration when breaking just the trailing "*/" postfix. // Note that checking that Length == 0 is not enough, since Length could // also be StringRef::npos. - if (Content[LineIndex].substr(TailOffset, Length).empty()) { + if (Content[LineIndex].substr(Offset, Length).empty()) { LineLength -= Decoration.size(); } } return LineLength; } +unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex, + bool Break) const { + if (Break) + return IndentAtLineBreak; + return std::max(0, ContentColumn[LineIndex]); +} + void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { + WhitespaceManager &Whitespaces) const { StringRef Text = Content[LineIndex].substr(TailOffset); StringRef Prefix = Decoration; // We need this to account for the case when we have a decoration "* " for all @@ -550,59 +523,14 @@ /*Spaces=*/LocalIndentAtLineBreak - Prefix.size()); } -BreakableToken::Split BreakableBlockComment::getSplitBefore( - unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, - llvm::Regex &CommentPragmasRegex) const { +BreakableToken::Split +BreakableBlockComment::getReflowSplit(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const { if (!mayReflow(LineIndex, CommentPragmasRegex)) return Split(StringRef::npos, 0); - StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); - Split Result = getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn, - ColumnLimit); - // Result is relative to TrimmedContent. Adapt it relative to - // Content[LineIndex]. - if (Result.first != StringRef::npos) - Result.first += Content[LineIndex].size() - TrimmedContent.size(); - return Result; -} -unsigned -BreakableBlockComment::getReflownColumn(StringRef Content, unsigned LineIndex, - unsigned PreviousEndColumn) const { - unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); - // If this is the last line, it will carry around its '*/' postfix. - unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0); - // The line is composed of previous text, reflow prefix, reflown text and - // postfix. - unsigned ReflownColumn = StartColumn + - encoding::columnWidthWithTabs( - Content, StartColumn, Style.TabWidth, Encoding) + - PostfixLength; - return ReflownColumn; -} - -unsigned BreakableBlockComment::getLineLengthAfterSplitBefore( - unsigned LineIndex, unsigned TailOffset, unsigned PreviousEndColumn, - unsigned ColumnLimit, Split SplitBefore) const { - if (SplitBefore.first == StringRef::npos || - // Block comment line contents contain the trailing whitespace after the - // decoration, so the need of left trim. Note that this behavior is - // consistent with the breaking of block comments where the indentation of - // a broken line is uniform across all the lines of the block comment. - SplitBefore.first + SplitBefore.second < - Content[LineIndex].ltrim().size()) { - // A piece of line, not the whole, gets reflown. - return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); - } else { - // The whole line gets reflown, need to check if we need to insert a break - // for the postfix or not. - StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); - unsigned ReflownColumn = - getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); - if (ReflownColumn <= ColumnLimit) { - return ReflownColumn; - } - return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); - } + size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); + return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); } bool BreakableBlockComment::introducesBreakBeforeToken() const { @@ -611,49 +539,39 @@ Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos; } -void BreakableBlockComment::replaceWhitespaceBefore( - unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, - Split SplitBefore, WhitespaceManager &Whitespaces) { +void BreakableBlockComment::reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const { + StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); + // Here we need to reflow. + assert(Tokens[LineIndex - 1] == Tokens[LineIndex] && + "Reflowing whitespace within a token"); + // This is the offset of the end of the last line relative to the start of + // the token text in the token. + unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + + Content[LineIndex - 1].size() - + tokenAt(LineIndex).TokenText.data(); + unsigned WhitespaceLength = TrimmedContent.data() - + tokenAt(LineIndex).TokenText.data() - + WhitespaceOffsetInToken; + Whitespaces.replaceWhitespaceInToken( + tokenAt(LineIndex), WhitespaceOffsetInToken, + /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"", + /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0, + /*Spaces=*/0); +} + +void BreakableBlockComment::adaptStartOfLine( + unsigned LineIndex, WhitespaceManager &Whitespaces) const { if (LineIndex == 0) { if (DelimitersOnNewline) { - // Since we're breaking af index 1 below, the break position and the + // Since we're breaking at index 1 below, the break position and the // break length are the same. size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks); - if (BreakLength != StringRef::npos) { + if (BreakLength != StringRef::npos) insertBreak(LineIndex, 0, Split(1, BreakLength), Whitespaces); - DelimitersOnNewline = true; - } } return; } - StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); - if (SplitBefore.first != StringRef::npos) { - // Here we need to reflow. - assert(Tokens[LineIndex - 1] == Tokens[LineIndex] && - "Reflowing whitespace within a token"); - // This is the offset of the end of the last line relative to the start of - // the token text in the token. - unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + - Content[LineIndex - 1].size() - - tokenAt(LineIndex).TokenText.data(); - unsigned WhitespaceLength = TrimmedContent.data() - - tokenAt(LineIndex).TokenText.data() - - WhitespaceOffsetInToken; - Whitespaces.replaceWhitespaceInToken( - tokenAt(LineIndex), WhitespaceOffsetInToken, - /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"", - /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0, - /*Spaces=*/0); - // Check if we need to also insert a break at the whitespace range. - // Note that we don't need a penalty for this break, since it doesn't change - // the total number of lines. - unsigned ReflownColumn = - getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); - if (ReflownColumn > ColumnLimit) - insertBreak(LineIndex, 0, SplitBefore, Whitespaces); - return; - } - // Here no reflow with the previous line will happen. // Fix the decoration of the line at LineIndex. StringRef Prefix = Decoration; @@ -689,8 +607,7 @@ } BreakableToken::Split -BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset, - unsigned ColumnLimit) const { +BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const { if (DelimitersOnNewline) { // Replace the trailing whitespace of the last line with a newline. // In case the last line is empty, the ending '*/' is already on its own @@ -716,15 +633,6 @@ !switchesFormatting(tokenAt(LineIndex)); } -unsigned -BreakableBlockComment::getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const { - // If we break, we always break at the predefined indent. - if (TailOffset != 0) - return IndentAtLineBreak; - return std::max(0, ContentColumn[LineIndex]); -} - BreakableLineCommentSection::BreakableLineCommentSection( const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, @@ -813,20 +721,25 @@ } } -unsigned BreakableLineCommentSection::getLineLengthAfterSplit( - unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const { - unsigned ContentStartColumn = - (TailOffset == 0 ? ContentColumn[LineIndex] - : OriginalContentColumn[LineIndex]); - return ContentStartColumn + encoding::columnWidthWithTabs( - Content[LineIndex].substr(TailOffset, Length), - ContentStartColumn, Style.TabWidth, Encoding); +unsigned +BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const { + return encoding::columnWidthWithTabs( + Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth, + Encoding); } -void BreakableLineCommentSection::insertBreak(unsigned LineIndex, - unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { +unsigned BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, + bool Break) const { + if (Break) + return OriginalContentColumn[LineIndex]; + return ContentColumn[LineIndex]; +} + +void BreakableLineCommentSection::insertBreak( + unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) const { StringRef Text = Content[LineIndex].substr(TailOffset); // Compute the offset of the split relative to the beginning of the token // text. @@ -845,34 +758,42 @@ /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size()); } -BreakableComment::Split BreakableLineCommentSection::getSplitBefore( - unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, - llvm::Regex &CommentPragmasRegex) const { +BreakableComment::Split BreakableLineCommentSection::getReflowSplit( + unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const { if (!mayReflow(LineIndex, CommentPragmasRegex)) return Split(StringRef::npos, 0); - return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn, - ColumnLimit); -} - -unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore( - unsigned LineIndex, unsigned TailOffset, unsigned PreviousEndColumn, - unsigned ColumnLimit, Split SplitBefore) const { - if (SplitBefore.first == StringRef::npos || - SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { - // A piece of line, not the whole line, gets reflown. - return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); - } else { - // The whole line gets reflown. - unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); - return StartColumn + - encoding::columnWidthWithTabs(Content[LineIndex], StartColumn, - Style.TabWidth, Encoding); - } -} -void BreakableLineCommentSection::replaceWhitespaceBefore( - unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, - Split SplitBefore, WhitespaceManager &Whitespaces) { + size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); + + // In a line comment section each line is a separate token; thus, after a + // split we replace all whitespace before the current line comment token + // (which does not need to be included in the split), plus the start of the + // line up to where the content starts. + return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); +} + +void BreakableLineCommentSection::reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const { + // Reflow happens between tokens. Replace the whitespace between the + // tokens by the empty string. + Whitespaces.replaceWhitespace( + *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, + /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false); + // Replace the indent and prefix of the token with the reflow prefix. + unsigned WhitespaceLength = + Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); + Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], + /*Offset=*/0, + /*ReplaceChars=*/WhitespaceLength, + /*PreviousPostfix=*/"", + /*CurrentPrefix=*/ReflowPrefix, + /*InPPDirective=*/false, + /*Newlines=*/0, + /*Spaces=*/0); +} + +void BreakableLineCommentSection::adaptStartOfLine( + unsigned LineIndex, WhitespaceManager &Whitespaces) const { // If this is the first line of a token, we need to inform Whitespace Manager // about it: either adapt the whitespace range preceding it, or mark it as an // untouchable token. @@ -880,44 +801,25 @@ // // line 1 \ // // line 2 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { - if (SplitBefore.first != StringRef::npos) { - // Reflow happens between tokens. Replace the whitespace between the - // tokens by the empty string. - Whitespaces.replaceWhitespace( - *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, - /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false); - // Replace the indent and prefix of the token with the reflow prefix. - unsigned WhitespaceLength = - Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); - Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], - /*Offset=*/0, - /*ReplaceChars=*/WhitespaceLength, - /*PreviousPostfix=*/"", - /*CurrentPrefix=*/ReflowPrefix, - /*InPPDirective=*/false, - /*Newlines=*/0, - /*Spaces=*/0); - } else { - // This is the first line for the current token, but no reflow with the - // previous token is necessary. However, we still may need to adjust the - // start column. Note that ContentColumn[LineIndex] is the expected - // content column after a possible update to the prefix, hence the prefix - // length change is included. - unsigned LineColumn = - ContentColumn[LineIndex] - - (Content[LineIndex].data() - Lines[LineIndex].data()) + - (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size()); - - // We always want to create a replacement instead of adding an untouchable - // token, even if LineColumn is the same as the original column of the - // token. This is because WhitespaceManager doesn't align trailing - // comments if they are untouchable. - Whitespaces.replaceWhitespace(*Tokens[LineIndex], - /*Newlines=*/1, - /*Spaces=*/LineColumn, - /*StartOfTokenColumn=*/LineColumn, - /*InPPDirective=*/false); - } + // This is the first line for the current token, but no reflow with the + // previous token is necessary. However, we still may need to adjust the + // start column. Note that ContentColumn[LineIndex] is the expected + // content column after a possible update to the prefix, hence the prefix + // length change is included. + unsigned LineColumn = + ContentColumn[LineIndex] - + (Content[LineIndex].data() - Lines[LineIndex].data()) + + (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size()); + + // We always want to create a replacement instead of adding an untouchable + // token, even if LineColumn is the same as the original column of the + // token. This is because WhitespaceManager doesn't align trailing + // comments if they are untouchable. + Whitespaces.replaceWhitespace(*Tokens[LineIndex], + /*Newlines=*/1, + /*Spaces=*/LineColumn, + /*StartOfTokenColumn=*/LineColumn, + /*InPPDirective=*/false); } if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) { // Adjust the prefix if necessary. @@ -930,13 +832,6 @@ tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "", /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); } - // Add a break after a reflow split has been introduced, if necessary. - // Note that this break doesn't need to be penalized, since it doesn't change - // the number of lines. - if (SplitBefore.first != StringRef::npos && - SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { - insertBreak(LineIndex, 0, SplitBefore, Whitespaces); - } } void BreakableLineCommentSection::updateNextToken(LineState &State) const { @@ -953,20 +848,17 @@ if (Lines[LineIndex].startswith("//")) { IndentContent = Lines[LineIndex].substr(2); } + // FIXME: Decide whether we want to reflow non-regular indents: + // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the + // OriginalPrefix[LineIndex-1]. That means we don't reflow + // // text that protrudes + // // into text with different indent + // We do reflow in that case in block comments. return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && mayReflowContent(Content[LineIndex]) && !Tok.Finalized && !switchesFormatting(tokenAt(LineIndex)) && OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]; } -unsigned -BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const { - if (TailOffset != 0) { - return OriginalContentColumn[LineIndex]; - } - return ContentColumn[LineIndex]; -} - } // namespace format } // namespace clang Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -1484,23 +1484,24 @@ LineState &State, bool AllowBreak, bool DryRun) { - std::unique_ptr Token = + std::unique_ptr Token = createBreakableToken(Current, State, AllowBreak); if (!Token) return 0; unsigned ColumnLimit = getColumnLimit(State); - unsigned StartColumn = State.Column - Current.ColumnWidth; if (Current.is(TT_LineComment)) { // We don't insert backslashes when breaking line comments. ColumnLimit = Style.ColumnLimit; } if (Current.UnbreakableTailLength >= ColumnLimit) return 0; - + // ColumnWidth was already accounted into State.Column before calling + // breakProtrudingToken. + unsigned StartColumn = State.Column - Current.ColumnWidth; unsigned NewBreakPenalty = Current.isStringLiteral() ? Style.PenaltyBreakString : Style.PenaltyBreakComment; - unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength; + // Stores whether we introduce a break anywhere in the token. bool BreakInserted = Token->introducesBreakBeforeToken(); // Store whether we inserted a new line break at the end of the previous // logical line. @@ -1508,145 +1509,258 @@ // We use a conservative reflowing strategy. Reflow starts after a line is // broken or the corresponding whitespace compressed. Reflow ends as soon as a // line that doesn't get reflown with the previous line is reached. - bool ReflowInProgress = false; - unsigned Penalty = 0; - unsigned RemainingTokenColumns = 0; + bool Reflow = false; + // Keep track of where we are in the token: + // Where we are in the content of the current logical line. unsigned TailOffset = 0; + // The number of columns left in the current logical line after TailOffset. + unsigned RemainingTokenColumns = 0; + // The column number we're currently at. + unsigned ContentStartColumn = 0; + + unsigned Penalty = 0; DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn << ".\n"); for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); LineIndex != EndIndex; ++LineIndex) { - DEBUG(llvm::dbgs() << " Line: " << LineIndex - << " (Reflow: " << ReflowInProgress << ")\n"); - BreakableToken::Split SplitBefore(StringRef::npos, 0); - if (ReflowInProgress) { - SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns, - RemainingSpace, CommentPragmasRegex); + DEBUG(llvm::dbgs() << " Line: " << LineIndex << " (Reflow: " << Reflow + << ")\n"); + if (Reflow) { + // The previous iteration requested that we try to reflow. + // Here, we will check whether we can actually reflow. + Reflow = false; + // ContentStartColumn is either + // - at the start of the line, directly after a break + // - the end of the last line +1, when continuing a reflow over multiple + // lines. + // Add the remaining length of the tokens after the last break or + // continuation to get our current start position for the reflow. + ContentStartColumn += RemainingTokenColumns; + // Get the split that we need to reflow current logical line into the end + // of the previous one; the split will include any leading whitespace of + // the current logical line. + BreakableToken::Split SplitBefore = + Token->getReflowSplit(LineIndex, CommentPragmasRegex); + DEBUG(llvm::dbgs() << " Size of reflown text: " << ContentStartColumn + << "\n Potential reflow split: "); + if (SplitBefore.first != StringRef::npos) { + DEBUG(llvm::dbgs() << SplitBefore.first << ", " << SplitBefore.second + << "\n"); + TailOffset = SplitBefore.first + SplitBefore.second; + // If the rest of the current line fits into the previous line below + // the column limit, we can safely reflow. + RemainingTokenColumns = Token->getRangeLength( + LineIndex, TailOffset, StringRef::npos, ContentStartColumn); + Reflow = true; + if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) { + DEBUG(llvm::dbgs() << " Over limit after reflow, need: " + << (ContentStartColumn + RemainingTokenColumns) + << ", space: " << ColumnLimit + << ", reflown prefix: " << ContentStartColumn + << ", offset in line: " << TailOffset << "\n"); + // If the whole current line does not fit, try to find a point in the + // current line at which we can break so that attaching the part of + // the current line to that break point onto the previous line is + // below the column limit. + BreakableToken::Split Split = + Token->getSplit(LineIndex, TailOffset, ColumnLimit, + ContentStartColumn, CommentPragmasRegex); + if (Split.first == StringRef::npos) { + DEBUG(llvm::dbgs() << " Did not find later break\n"); + Reflow = false; + } else { + // Check whether the first split point gets us below the column + // limit. Note that we will execute this later split below as + // part of the normal token breaking and reflow logic within the + // line. + unsigned ToSplitColumns = Token->getRangeLength( + LineIndex, TailOffset, Split.first, ContentStartColumn); + if (ContentStartColumn + ToSplitColumns > ColumnLimit) { + DEBUG(llvm::dbgs() << " Next split protrudes, need: " + << (ContentStartColumn + ToSplitColumns) + << ", space: " << ColumnLimit); + unsigned ExcessCharactersPenalty = + (ContentStartColumn + ToSplitColumns - ColumnLimit) * + Style.PenaltyExcessCharacter; + if (NewBreakPenalty < ExcessCharactersPenalty) { + Reflow = false; + } + } + } + } + } else { + DEBUG(llvm::dbgs() << "not found.\n"); + } } - ReflowInProgress = SplitBefore.first != StringRef::npos; - DEBUG({ - if (ReflowInProgress) - llvm::dbgs() << " Reflowing.\n"; - }); - TailOffset = - ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0; - // If we found a reflow split and have added a new break before this line, - // we are going to remove the line break at the start of the next logical - // line. - // For example, here we'll add a new line break after 'text', and - // subsequently delete the line break between 'that' and 'reflows'. - // // some text that - // // reflows - // -> - // // some text - // // that reflows - // When adding the line break, we also added the penalty for it, so we need - // to subtract that penalty again when we remove the line break due to - // reflowing. - if (ReflowInProgress && NewBreakBefore) { - assert(Penalty >= NewBreakPenalty); - Penalty -= NewBreakPenalty; + if (Reflow) { + // If we found a reflow split and have added a new break before this + // line, we are going to remove the line break at the start of the next + // logical line. For example, here we'll add a new line break after + // 'text', and subsequently delete the line break between 'that' and + // 'reflows'. + // // some text that + // // reflows + // -> + // // some text + // // that reflows + // When adding the line break, we also added the penalty for it, so we + // need to subtract that penalty again when we remove the line break due + // to reflowing. + if (NewBreakBefore) { + assert(Penalty >= NewBreakPenalty); + Penalty -= NewBreakPenalty; + } + // When we reflow, we need to add a space between the end of the last + // line and the current line's start column. + ++ContentStartColumn; + if (!DryRun) + Token->reflow(LineIndex, Whitespaces); + } else { + // If we didn't reflow into this line, the only space to consider is the + // current logical line. + TailOffset = 0; + ContentStartColumn = + Token->getContentStartColumn(LineIndex, /*Break=*/false); + RemainingTokenColumns = Token->getRangeLength( + LineIndex, TailOffset, StringRef::npos, ContentStartColumn); + // Adapt the start of the token, for example indent. + if (!DryRun) + Token->adaptStartOfLine(LineIndex, Whitespaces); } + NewBreakBefore = false; - if (!DryRun) - Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns, - RemainingSpace, SplitBefore, Whitespaces); - RemainingTokenColumns = Token->getLineLengthAfterSplitBefore( - LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore); - while (RemainingTokenColumns > RemainingSpace) { - DEBUG(llvm::dbgs() << " Over limit, need: " << RemainingTokenColumns - << ", space: " << RemainingSpace << "\n"); - BreakableToken::Split Split = Token->getSplit( - LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex); + // Break the current token until we can fit the rest of the line. + while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) { + DEBUG(llvm::dbgs() << " Over limit, need: " + << (ContentStartColumn + RemainingTokenColumns) + << ", space: " << ColumnLimit + << ", reflown prefix: " << ContentStartColumn + << ", offset in line: " << TailOffset << "\n"); + // If the current token doesn't fit, find the latest possible split in the + // current line so that breaking at it will be under the column limit. + // FIXME: Use the earliest possible split while reflowing to correctly + // compress whitespace within a line. + BreakableToken::Split Split = + Token->getSplit(LineIndex, TailOffset, ColumnLimit, + ContentStartColumn, CommentPragmasRegex); if (Split.first == StringRef::npos) { - // The last line's penalty is handled in addNextStateToQueue(). + // No break opportunity - update the penalty and continue with the next + // logical line. if (LineIndex < EndIndex - 1) + // The last line's penalty is handled in addNextStateToQueue(). Penalty += Style.PenaltyExcessCharacter * - (RemainingTokenColumns - RemainingSpace); + (ContentStartColumn + RemainingTokenColumns - ColumnLimit); DEBUG(llvm::dbgs() << " No break opportunity.\n"); break; } assert(Split.first != 0); - // Check if compressing the whitespace range will bring the line length - // under the limit. If that is the case, we perform whitespace compression - // instead of inserting a line break. - unsigned RemainingTokenColumnsAfterCompression = - Token->getLineLengthAfterCompression(RemainingTokenColumns, Split); - if (RemainingTokenColumnsAfterCompression <= RemainingSpace) { - RemainingTokenColumns = RemainingTokenColumnsAfterCompression; - ReflowInProgress = true; - if (!DryRun) - Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); - DEBUG(llvm::dbgs() << " Compressing below limit.\n"); - break; - } - - // Compute both the penalties for: - // - not breaking, and leaving excess characters - // - adding a new line break - assert(RemainingTokenColumnsAfterCompression > RemainingSpace); - unsigned ExcessCharactersPenalty = - (RemainingTokenColumnsAfterCompression - RemainingSpace) * - Style.PenaltyExcessCharacter; - - unsigned BreakPenalty = NewBreakPenalty; - unsigned ColumnsUsed = - Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); - if (ColumnsUsed > ColumnLimit) - BreakPenalty += - Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit); - - DEBUG(llvm::dbgs() << " Penalty excess: " << ExcessCharactersPenalty - << "\n break : " << BreakPenalty << "\n"); - // Only continue to add the line break if the penalty of the excess - // characters is larger than the penalty of the line break. - // FIXME: This does not take into account when we can later remove the - // line break again due to a reflow. - if (ExcessCharactersPenalty < BreakPenalty) { - if (!DryRun) - Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); - // Do not set ReflowInProgress: we do not have any space left to - // reflow into. - Penalty += ExcessCharactersPenalty; - break; + if (!Current.isStringLiteral()) { + // Check whether the next natural split point after the current one + // introduces can still fit the line, either because we can compress + // away whitespace, or because the penalty the excess characters + // introduce is lower than the break penalty. + // As we break string literals on any position and never want to change + // their content, we only do this for comments. + unsigned ToSplitColumns = Token->getRangeLength( + LineIndex, TailOffset, Split.first, ContentStartColumn); + + BreakableToken::Split NextSplit(StringRef::npos, 0); + NextSplit = Token->getSplit( + LineIndex, TailOffset + Split.first + Split.second, ColumnLimit, + ContentStartColumn + ToSplitColumns, CommentPragmasRegex); + // Compute the comlumns necessary to fit the next non-breakable sequence + // into the current line. + unsigned ToNextSplitColumns = Token->getRangeLength( + LineIndex, TailOffset, + NextSplit.first != StringRef::npos + ? Split.first + Split.second + NextSplit.first + : StringRef::npos, + ContentStartColumn); + // Compress the whitespace between the break and the start of the next + // unbreakable sequence. + ToNextSplitColumns = + Token->getLineLengthAfterCompression(ToNextSplitColumns, Split); + // If the whitespace compression makes us fit, continue on the current + // line. + bool ContinueOnLine = + ContentStartColumn + ToNextSplitColumns <= ColumnLimit; + unsigned ExcessCharactersPenalty = 0; + if (!ContinueOnLine) { + // Similarly, if the excess characters' penalty is lower than the + // penalty of introducing a new break, continue on the current line. + ExcessCharactersPenalty = + (ContentStartColumn + ToNextSplitColumns - ColumnLimit) * + Style.PenaltyExcessCharacter; + DEBUG(llvm::dbgs() + << " Penalty excess: " << ExcessCharactersPenalty + << "\n break : " << NewBreakPenalty << "\n"); + if (ExcessCharactersPenalty < NewBreakPenalty) { + ContinueOnLine = true; + } + } + if (ContinueOnLine) { + DEBUG(llvm::dbgs() << " Continuing on line...\n"); + Reflow = true; + if (!DryRun) + Token->compressWhitespace(LineIndex, TailOffset, Split, + Whitespaces); + // When we continue on the same line, leave one space between content. + ContentStartColumn += ToSplitColumns + 1; + Penalty += ExcessCharactersPenalty; + TailOffset += Split.first + Split.second; + RemainingTokenColumns = Token->getRangeLength( + LineIndex, TailOffset, StringRef::npos, ContentStartColumn); + continue; + } } - - unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( - LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); + DEBUG(llvm::dbgs() << " Breaking...\n"); + ContentStartColumn = + Token->getContentStartColumn(LineIndex, /*Break=*/true); + unsigned NewRemainingTokenColumns = Token->getRangeLength( + LineIndex, TailOffset + Split.first + Split.second, StringRef::npos, + ContentStartColumn); // When breaking before a tab character, it may be moved by a few columns, // but will still be expanded to the next tab stop, so we don't save any // columns. - if (NewRemainingTokenColumns == RemainingTokenColumns) + if (NewRemainingTokenColumns == RemainingTokenColumns) { // FIXME: Do we need to adjust the penalty? break; + } assert(NewRemainingTokenColumns < RemainingTokenColumns); + DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first + << ", " << Split.second << "\n"); if (!DryRun) Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); - Penalty += BreakPenalty; + Penalty += NewBreakPenalty; TailOffset += Split.first + Split.second; RemainingTokenColumns = NewRemainingTokenColumns; - ReflowInProgress = true; + Reflow = true; BreakInserted = true; NewBreakBefore = true; } } BreakableToken::Split SplitAfterLastLine = - Token->getSplitAfterLastLine(TailOffset, ColumnLimit); + Token->getSplitAfterLastLine(TailOffset); if (SplitAfterLastLine.first != StringRef::npos) { DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n"); if (!DryRun) Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine, Whitespaces); - RemainingTokenColumns = Token->getLineLengthAfterSplitAfterLastLine( - TailOffset, SplitAfterLastLine); + ContentStartColumn = + Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true); + RemainingTokenColumns = Token->getRangeLength( + Token->getLineCount() - 1, + TailOffset + SplitAfterLastLine.first + SplitAfterLastLine.second, + StringRef::npos, ContentStartColumn); } - State.Column = RemainingTokenColumns; + State.Column = ContentStartColumn + RemainingTokenColumns - + Current.UnbreakableTailLength; if (BreakInserted) { // If we break the token inside a parameter list, we need to break before Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -7732,6 +7732,12 @@ format("#define A \"some text other\";", AlignLeft)); } +TEST_F(FormatTest, BreaksStringLiteralsAtColumnLimit) { + EXPECT_EQ("C a = \"some more \"\n" + " \"text\";", + format("C a = \"some more text\";", getLLVMStyleWithColumns(18))); +} + TEST_F(FormatTest, FullyRemoveEmptyLines) { FormatStyle NoEmptyLines = getLLVMStyleWithColumns(80); NoEmptyLines.MaxEmptyLinesToKeep = 0; @@ -9927,16 +9933,9 @@ Style.PenaltyExcessCharacter = 90; verifyFormat("int a; // the comment", Style); - EXPECT_EQ("int a; // the\n" - " // comment aa", + EXPECT_EQ("int a; // the comment\n" + " // aa", format("int a; // the comment aa", Style)); - EXPECT_EQ("int a; // first line\n" - " // second line\n" - " // third line", - format("int a; // first line\n" - " // second line\n" - " // third line", - Style)); EXPECT_EQ("int a; /* first line\n" " * second line\n" " * third line\n" @@ -9946,12 +9945,18 @@ " * third line\n" " */", Style)); + EXPECT_EQ("int a; // first line\n" + " // second line\n" + " // third line", + format("int a; // first line\n" + " // second line\n" + " // third line", + Style)); // FIXME: Investigate why this is not getting the same layout as the test // above. EXPECT_EQ("int a; /* first line\n" - " * second\n" - " * line third\n" - " * line\n" + " * second line\n" + " * third line\n" " */", format("int a; /* first line second line third line" "\n*/", @@ -9970,31 +9975,23 @@ // FIXME: Optimally, we'd keep bazfoo on the first line and reflow bar to the // next one. - EXPECT_EQ("// foo bar baz\n" - "// bazfoo bar foo\n" - "// bar\n", + EXPECT_EQ("// foo bar baz bazfoo\n" + "// bar foo bar\n", format("// foo bar baz bazfoo bar\n" "// foo bar\n", Style)); EXPECT_EQ("// foo bar baz bazfoo\n" - "// foo bar baz\n" - "// bazfoo bar foo\n" - "// bar\n", + "// foo bar baz bazfoo\n" + "// bar foo bar\n", format("// foo bar baz bazfoo\n" "// foo bar baz bazfoo bar\n" "// foo bar\n", Style)); - // FIXME: Optimally, we'd keep 'bar' in the last line at the end of the line, - // as it does not actually protrude far enough to make breaking pay off. - // Unfortunately, due to how reflowing is currently implemented, we already - // check the column limit after the reflowing decision and extend the reflow - // range, so we will not take whitespace compression into account. EXPECT_EQ("// foo bar baz bazfoo\n" - "// foo bar baz\n" - "// bazfoo bar foo\n" - "// bar\n", + "// foo bar baz bazfoo\n" + "// bar foo bar\n", format("// foo bar baz bazfoo\n" "// foo bar baz bazfoo bar\n" "// foo bar\n", @@ -10595,10 +10592,12 @@ "\"七 八 九 \"\n" "\"十\"", format("\"一 二 三 四 五六 七 八 九 十\"", getLLVMStyleWithColumns(11))); - EXPECT_EQ("\"一\t二 \"\n" - "\"\t三 \"\n" - "\"四 五\t六 \"\n" - "\"\t七 \"\n" + EXPECT_EQ("\"一\t\"\n" + "\"二 \t\"\n" + "\"三 四 \"\n" + "\"五\t\"\n" + "\"六 \t\"\n" + "\"七 \"\n" "\"八九十\tqq\"", format("\"一\t二 \t三 四 五\t六 \t七 八九十\tqq\"", getLLVMStyleWithColumns(11))); Index: unittests/Format/FormatTestComments.cpp =================================================================== --- unittests/Format/FormatTestComments.cpp +++ unittests/Format/FormatTestComments.cpp @@ -1096,11 +1096,12 @@ } TEST_F(FormatTestComments, SplitsLongLinesInComments) { + // FIXME: Do we need to fix up the " */" at the end? + // It doesn't look like any of our current logic triggers this. EXPECT_EQ("/* This is a long\n" " * comment that\n" - " * doesn't\n" - " * fit on one line.\n" - " */", + " * doesn't fit on\n" + " * one line. */", format("/* " "This is a long " "comment that " @@ -2102,6 +2103,66 @@ EXPECT_EQ("///", format(" /// ", getLLVMStyleWithColumns(20))); } +TEST_F(FormatTestComments, ReflowsCommentsPrecise) { + // FIXME: This assumes we do not continue compressing whitespace once we are + // in reflow mode. Consider compressing whitespace. + + // Test that we stop reflowing precisely at the column limit. + // After reflowing, "// reflows into foo" does not fit the column limit, + // so we compress the whitespace. + EXPECT_EQ("// some text that\n" + "// reflows into foo\n", + format("// some text that reflows\n" + "// into foo\n", + getLLVMStyleWithColumns(20))); + // Given one more column, "// reflows into foo" does fit the limit, so we + // do not compress the whitespace. + EXPECT_EQ("// some text that\n" + "// reflows into foo\n", + format("// some text that reflows\n" + "// into foo\n", + getLLVMStyleWithColumns(21))); +} + +TEST_F(FormatTestComments, ReflowsCommentsWithExtraWhitespace) { + // Baseline. + EXPECT_EQ("// some text\n" + "// that re flows\n", + format("// some text that\n" + "// re flows\n", + getLLVMStyleWithColumns(16))); + EXPECT_EQ("// some text\n" + "// that re flows\n", + format("// some text that\n" + "// re flows\n", + getLLVMStyleWithColumns(16))); + EXPECT_EQ("/* some text\n" + " * that re flows\n" + " */\n", + format("/* some text that\n" + "* re flows\n" + "*/\n", + getLLVMStyleWithColumns(16))); + // FIXME: We do not reflow if the indent of two subsequent lines differs; + // given that this is different behavior from block comments, do we want + // to keep this? + EXPECT_EQ("// some text\n" + "// that\n" + "// re flows\n", + format("// some text that\n" + "// re flows\n", + getLLVMStyleWithColumns(16))); + // Space within parts of a line that fit. + // FIXME: Use the earliest possible split while reflowing to compress the + // whitespace within the line. + EXPECT_EQ("// some text that\n" + "// does re flow\n" + "// more here\n", + format("// some text that does\n" + "// re flow more here\n", + getLLVMStyleWithColumns(21))); +} + TEST_F(FormatTestComments, IgnoresIf0Contents) { EXPECT_EQ("#if 0\n" "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n" @@ -2484,6 +2545,7 @@ " long */\n" " b);", format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16))); + EXPECT_EQ( "a = f(\n" " a,\n" @@ -2888,7 +2950,7 @@ getLLVMStyleWithColumns(20))); } -TEST_F(FormatTestComments, NoCrush_Bug34236) { +TEST_F(FormatTestComments, NoCrash_Bug34236) { // This is a test case from a crasher reported in: // https://bugs.llvm.org/show_bug.cgi?id=34236 // Temporarily disable formatting for readability. @@ -2896,8 +2958,7 @@ EXPECT_EQ( "/* */ /*\n" " * a\n" -" * b c\n" -" * d*/", +" * b c d*/", format( "/* */ /*\n" " * a b\n"