Index: lib/Format/BreakableToken.h =================================================================== --- lib/Format/BreakableToken.h +++ lib/Format/BreakableToken.h @@ -8,9 +8,10 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief Declares BreakableToken, BreakableStringLiteral, and -/// BreakableBlockComment classes, that contain token type-specific logic to -/// break long lines in tokens. +/// \brief Declares BreakableToken, BreakableStringLiteral, BreakableComment, +/// BreakableBlockComment and BreakableLineCommentSection classes, that contain +/// token type-specific logic to break long lines in tokens and reflow content +/// between tokens. /// //===----------------------------------------------------------------------===// @@ -29,6 +30,26 @@ /// \brief Base class for strategies on how to break tokens. /// +/// This is organised around the concept of a \c Split, which is a whitespace +/// range that signifies a position of the content of a token where a +/// reformatting might be done. Operating with splits is divided into 3 +/// operations: +/// - getSplit, for finding a split starting at a position, +/// - getLineLengthAfterSplit, for calculating the size in columns of the rest +/// of the content after a split has been used for breaking, and +/// - insertBreak, for executing the split using a whitespace manager. +/// +/// For tokens where the whitespace before each line needs to be also +/// reformatted, for example for tokens supporting reflow, there are analogous +/// operations that might be executed before the main line breaking occurs: +/// - getSplitBefore, for finding a split such that the content preceding it +/// needs to be specially reflown, +/// - getLineLengthAfterSplitBefore, for calculating the line length in columns +/// of the remainder of the content after the beginning of the content has +/// been reformatted, and +/// - replaceWhitespaceBefore, for executing the reflow using a whitespace +/// manager. +/// /// FIXME: The interface seems set in stone, so we might want to just pull the /// strategy into the class, instead of controlling it from the outside. class BreakableToken { @@ -42,13 +63,13 @@ virtual unsigned getLineCount() const = 0; /// \brief Returns the number of columns required to format the piece of line - /// at \p LineIndex, from byte offset \p Offset with length \p Length. + /// at \p LineIndex, from byte offset \p TailOffset with length \p Length. /// - /// Note that previous breaks are not taken into account. \p Offset is always - /// specified from the start of the (original) line. + /// Note that previous breaks are not taken into account. \p TailOffset is + /// always specified from the start of the (original) line. /// \p Length can be set to StringRef::npos, which means "to the end of line". virtual unsigned - getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset, + getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, StringRef::size_type Length) const = 0; /// \brief Returns a range (offset, length) at which to break the line at @@ -67,10 +88,51 @@ Split Split, WhitespaceManager &Whitespaces) = 0; + /// \brief Returns a whitespace range (offset, length) of the content at \p + /// LineIndex such that the content preceding this range needs to be + /// reformatted before any breaks are made to this line. + /// + /// \p PreviousEndColumn is the end column of the previous line after + /// formatting. + /// + /// \p ReflowInProgress specifies whether this line may be reflown with the + /// previous line. + /// + /// A result having offset == StringRef::npos means that no piece of the line + /// needs to be reformatted before any breaks are made. + virtual Split getSplitBefore(unsigned LineIndex, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + bool ReflowInProgress) const { + return Split(StringRef::npos, 0); + } + + /// \brief Returns the number of columns required to format the piece of line + /// at \p LineIndex after the content preceding the whitespace range specified + /// \p SplitBefore has been reformatted, but before any breaks are made to + /// this line. + virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, + unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const { + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } + /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. + /// Performs a reformatting of the content at \p LineIndex preceding the + /// whitespace range \p SplitBefore. virtual void replaceWhitespaceBefore(unsigned LineIndex, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore, WhitespaceManager &Whitespaces) {} + /// \brief Updates the next token of \p State to the next token after this + /// one. This makes sense when this token manages a set of underlying tokens + /// as a unit and is responsible for the formatting of the them. + virtual void updateNextToken(LineState &State) const {} + protected: BreakableToken(const FormatToken &Tok, unsigned IndentLevel, bool InPPDirective, encoding::Encoding Encoding, @@ -130,94 +192,139 @@ WhitespaceManager &Whitespaces) override {} }; -class BreakableLineComment : public BreakableSingleLineToken { -public: - /// \brief Creates a breakable token for a line comment. +class BreakableComment : public BreakableToken { +protected: + /// \brief Creates a breakable token for a comment. /// /// \p StartColumn specifies the column in which the comment will start - /// after formatting. - BreakableLineComment(const FormatToken &Token, unsigned IndentLevel, - unsigned StartColumn, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style); + /// after formatting, while \p OriginalStartColumn specifies in which + /// column the comment started before formatting. + /// If the comment starts a line after formatting, set \p FirstInLine to true. + BreakableComment(const FormatToken &Token, unsigned IndentLevel, + unsigned StartColumn, unsigned OriginalStartColumn, + bool FirstInLine, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style); +public: + unsigned getLineCount() const override; Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit) const override; - void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override; - void replaceWhitespaceBefore(unsigned LineIndex, - WhitespaceManager &Whitespaces) override; +protected: + virtual unsigned getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const = 0; -private: - // The prefix without an additional space if one was added. - StringRef OriginalPrefix; + // Returns a split that divides Text into a left and right parts, such that + // the left part is suitable for reflowing after PreviousEndColumn. + Split getReflowSplit(StringRef Text, StringRef ReflowPrefix, + unsigned PreviousEndColumn, unsigned ColumnLimit) const; + + // Returns the token containing the line at LineIndex. + const FormatToken &tokenAt(unsigned LineIndex) const; + + // Checks if the content of line LineIndex may be reflown with the previous + // line. + bool mayReflow(unsigned LineIndex, bool ReflowInProgress) const; + + // Contains the original text of the lines of the block comment. + // + // In case of a block comments, excludes the leading /* in the first line and + // trailing */ in the last line. In case of line comments, excludes the + // leading // and spaces. + SmallVector Lines; + + // Contains the text of the lines excluding all leading and trailing + // whitespace between the lines. Note that the decoration (if present) is also + // not considered part of the text. + SmallVector Content; + + // Tokens[i] contains a reference to the token containing Lines[i] if the + // whitespace range before that token is managed by this block. + // Otherwise, Tokens[i] is a null pointer. + SmallVector Tokens; + + // ContentColumn[i] is the target column at which Content[i] should be. + // Note that this excludes a leading "* " or "*" in case of block comments + // where all lines have a "*" prefix, or the leading "// " or "//" in case of + // line comments. + // + // In block comments, the first line's target column is always positive. The + // remaining lines' target columns are relative to the first line to allow + // correct indentation of comments in \c WhitespaceManager. Thus they can be + // negative as well (in case the first line needs to be unindented more than + // there's actual whitespace in another line). + SmallVector ContentColumn; + + // The intended start column of the first line of text from this section. + unsigned StartColumn; + + // The original start column of the first line of text from this section. + unsigned OriginalStartColumn; + + // Whether the first token of this section is the first token in its unwrapped + // line. + bool FirstInLine; + + // In case of line comments, holds the original prefix, including trailing + // whitespace. + SmallVector OriginalPrefix; + + // The prefix to use in front a line that has been reflown up. + // For example, when reflowing the second line after the first here: + // // comment 1 + // // comment 2 + // we expect: + // // comment 1 comment 2 + // and not: + // // comment 1comment 2 + StringRef ReflowPrefix = " "; }; -class BreakableBlockComment : public BreakableToken { +class BreakableBlockComment : public BreakableComment { public: - /// \brief Creates a breakable token for a block comment. - /// - /// \p StartColumn specifies the column in which the comment will start - /// after formatting, while \p OriginalStartColumn specifies in which - /// column the comment started before formatting. - /// If the comment starts a line after formatting, set \p FirstInLine to true. BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel, - unsigned StartColumn, unsigned OriginaStartColumn, + unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); - unsigned getLineCount() const override; - unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, + unsigned getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset, StringRef::size_type Length) const override; - Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override; - void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - void replaceWhitespaceBefore(unsigned LineIndex, + Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, + bool ReflowInProgress) const override; + unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, + unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const override; + void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore, WhitespaceManager &Whitespaces) override; private: - // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex], - // so that all whitespace between the lines is accounted to Lines[LineIndex] - // as leading whitespace: - // - Lines[LineIndex] points to the text after that whitespace - // - Lines[LineIndex-1] shrinks by its trailing whitespace - // - LeadingWhitespace[LineIndex] is updated with the complete whitespace - // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex] + // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex]. + // + // Updates Content[LineIndex-1] and Content[LineIndex] by stripping off + // leading and trailing whitespace. // - // Sets StartOfLineColumn to the intended column in which the text at + // Sets ContentColumn to the intended column in which the text at // Lines[LineIndex] starts (note that the decoration, if present, is not // considered part of the text). void adjustWhitespace(unsigned LineIndex, int IndentDelta); - // Returns the column at which the text in line LineIndex starts, when broken - // at TailOffset. Note that the decoration (if present) is not considered part - // of the text. - unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const; + // Computes the end column if the full Content from LineIndex gets reflown + // after PreviousEndColumn. + unsigned getReflownColumn(StringRef Content, + unsigned LineIndex, + unsigned PreviousEndColumn) const; - // Contains the text of the lines of the block comment, excluding the leading - // /* in the first line and trailing */ in the last line, and excluding all - // trailing whitespace between the lines. Note that the decoration (if - // present) is also not considered part of the text. - SmallVector Lines; - - // LeadingWhitespace[i] is the number of characters regarded as whitespace in - // front of Lines[i]. Note that this can include "* " sequences, which we - // regard as whitespace when all lines have a "*" prefix. - SmallVector LeadingWhitespace; - - // StartOfLineColumn[i] is the target column at which Line[i] should be. - // Note that this excludes a leading "* " or "*" in case all lines have - // a "*" prefix. - // The first line's target column is always positive. The remaining lines' - // target columns are relative to the first line to allow correct indentation - // of comments in \c WhitespaceManager. Thus they can be negative as well (in - // case the first line needs to be unindented more than there's actual - // whitespace in another line). - SmallVector StartOfLineColumn; + unsigned getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const override; // The column at which the text of a broken line should start. // Note that an optional decoration would go before that column. @@ -239,6 +346,54 @@ StringRef Decoration; }; +class BreakableLineCommentSection : public BreakableComment { +public: + BreakableLineCommentSection(const FormatToken &Token, unsigned IndentLevel, + unsigned StartColumn, + unsigned OriginalStartColumn, bool FirstInLine, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); + + unsigned getLineLengthAfterSplit(unsigned LineIndex, + unsigned TailOffset, + StringRef::size_type Length) const override; + void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) override; + Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, + bool ReflowInProgress) const override; + unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const override; + void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn, + unsigned ColumnLimit, Split SplitBefore, + WhitespaceManager &Whitespaces) override; + void updateNextToken(LineState& State) const override; + +private: + unsigned getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const override; + + // Prefix[i] contains the intended leading "//" with trailing spaces to + // account for the indentation of content within the comment at line i after + // formatting. It can be different than the original prefix when the original + // line starts like this: + // //content + // Then the original prefix is "//", but the prefix is "// ". + SmallVector Prefix; + + SmallVector OriginalContentColumn; + + /// \brief The token to which the last line of this breakable token belongs + /// to; nullptr if that token is the initial token. + /// + /// The distinction is because if the token of the last line of this breakable + /// token is distinct from the initial token, this breakable token owns the + /// whitespace before the token of the last line, and the whitespace manager + /// must be able to modify it. + FormatToken *LastLineTok = nullptr; +}; } // namespace format } // namespace clang Index: lib/Format/BreakableToken.cpp =================================================================== --- lib/Format/BreakableToken.cpp +++ lib/Format/BreakableToken.cpp @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// #include "BreakableToken.h" -#include "Comments.h" +#include "ContinuationIndenter.h" #include "clang/Basic/CharInfo.h" #include "clang/Format/Format.h" #include "llvm/ADT/STLExtras.h" @@ -40,6 +40,21 @@ } } +static StringRef getLineCommentIndentPrefix(StringRef Comment) { + static const char *const KnownPrefixes[] = {"///", "//", "//!"}; + StringRef LongestPrefix; + for (StringRef KnownPrefix : KnownPrefixes) { + if (Comment.startswith(KnownPrefix)) { + size_t PrefixLength = KnownPrefix.size(); + while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') + ++PrefixLength; + if (PrefixLength > LongestPrefix.size()) + LongestPrefix = Comment.substr(0, PrefixLength); + } + } + return LongestPrefix; +} + static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, @@ -135,9 +150,10 @@ unsigned BreakableSingleLineToken::getLineCount() const { return 1; } unsigned BreakableSingleLineToken::getLineLengthAfterSplit( - unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { + unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const { return StartColumn + Prefix.size() + Postfix.size() + - encoding::columnWidthWithTabs(Line.substr(Offset, Length), + encoding::columnWidthWithTabs(Line.substr(TailOffset, Length), StartColumn + Prefix.size(), Style.TabWidth, Encoding); } @@ -183,71 +199,112 @@ Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces); } -BreakableLineComment::BreakableLineComment( - const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, - bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableSingleLineToken(Token, IndentLevel, StartColumn, - getLineCommentIndentPrefix(Token.TokenText), "", - InPPDirective, Encoding, Style) { - OriginalPrefix = Prefix; - if (Token.TokenText.size() > Prefix.size() && - isAlphanumeric(Token.TokenText[Prefix.size()])) { - if (Prefix == "//") - Prefix = "// "; - else if (Prefix == "///") - Prefix = "/// "; - else if (Prefix == "//!") - Prefix = "//! "; - } +BreakableComment::BreakableComment(const FormatToken &Token, + unsigned IndentLevel, unsigned StartColumn, + unsigned OriginalStartColumn, + bool FirstInLine, bool InPPDirective, + encoding::Encoding Encoding, + const FormatStyle &Style) + : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style), + StartColumn(StartColumn), OriginalStartColumn(OriginalStartColumn), + FirstInLine(FirstInLine) {} + +unsigned BreakableComment::getLineCount() const { return Lines.size(); } + +BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex, + unsigned TailOffset, + unsigned ColumnLimit) const { + return getCommentSplit(Content[LineIndex].substr(TailOffset), + getContentStartColumn(LineIndex, TailOffset), + ColumnLimit, Style.TabWidth, Encoding); +} + +void BreakableComment::replaceWhitespace(unsigned LineIndex, + unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) { + StringRef Text = Content[LineIndex].substr(TailOffset); + unsigned BreakOffsetInToken = + Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; + unsigned CharsToRemove = Split.second; + Whitespaces.replaceWhitespaceInToken( + tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "", + /*InPPDirective=*/false, + /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); } BreakableToken::Split -BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const { - return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), - ColumnLimit, Style.TabWidth, Encoding); +BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix, + unsigned PreviousEndColumn, + unsigned ColumnLimit) const { + unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size(); + // The width of a line if the whole Text is reflown. + StringRef TrimmedText = Text.rtrim(Blanks); + unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs( + TrimmedText, ReflowStartColumn, + Style.TabWidth, Encoding); + Split ReflowSplit = + FullWidth <= ColumnLimit + ? Split(TrimmedText.size(), Text.size() - TrimmedText.size()) + : getCommentSplit(Text, ReflowStartColumn, ColumnLimit, + Style.TabWidth, Encoding); + + if (ReflowSplit.first != StringRef::npos && + PreviousEndColumn + ReflowPrefix.size() + + encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first), + PreviousEndColumn + + ReflowPrefix.size(), + Style.TabWidth, Encoding) <= + ColumnLimit) { + return ReflowSplit; + } + return Split(StringRef::npos, 0); } -void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, - Split Split, - WhitespaceManager &Whitespaces) { - Whitespaces.replaceWhitespaceInToken( - Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, - Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn); +const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const { + return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok; } -void BreakableLineComment::replaceWhitespace(unsigned LineIndex, - unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { - Whitespaces.replaceWhitespaceInToken( - Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "", - "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0, - /*Spaces=*/1); +static bool mayReflowContent(StringRef Content) { + Content = Content.trim(Blanks); + // Simple heuristic for what to reflow: content should contain at least two + // characters and either the first or second character must be + // non-punctuation. + return Content.size() > 1 && + Content != "clang-format on" && + Content != "clang-format off" && + !Content.endswith("\\") && + // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is + // true, then the first code point must be 1 byte long. + (!isPunctuation(Content[0]) || !isPunctuation(Content[1])); } -void BreakableLineComment::replaceWhitespaceBefore( - unsigned LineIndex, WhitespaceManager &Whitespaces) { - if (OriginalPrefix != Prefix) { - Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", - /*InPPDirective=*/false, - /*Newlines=*/0, /*IndentLevel=*/0, - /*Spaces=*/1); - } +bool BreakableComment::mayReflow(unsigned LineIndex, + bool ReflowInProgress) const { + return ReflowInProgress && LineIndex > 0 && + mayReflowContent(Content[LineIndex]) && !Tok.Finalized && + (!Tok.is(TT_LineComment) || + OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]); } BreakableBlockComment::BreakableBlockComment( const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) { - StringRef TokenText(Token.TokenText); + : BreakableComment(Token, IndentLevel, StartColumn, OriginalStartColumn, + FirstInLine, InPPDirective, Encoding, Style) { + assert(Tok.is(TT_BlockComment) && + "block comment section must start with a block comment"); + + StringRef TokenText(Tok.TokenText); assert(TokenText.startswith("/*") && TokenText.endswith("*/")); TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); int IndentDelta = StartColumn - OriginalStartColumn; - LeadingWhitespace.resize(Lines.size()); - StartOfLineColumn.resize(Lines.size()); - StartOfLineColumn[0] = StartColumn + 2; + Content.resize(Lines.size()); + Content[0] = Lines[0]; + ContentColumn.resize(Lines.size()); + ContentColumn[0] = StartColumn + 2; + Tokens.resize(Lines.size()); for (size_t i = 1; i < Lines.size(); ++i) adjustWhitespace(i, IndentDelta); @@ -262,19 +319,20 @@ } for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { // If the last line is empty, the closing "*/" will have a star. - if (i + 1 == e && Lines[i].empty()) + if (i + 1 == e && Content[i].empty()) break; - if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i])) + if (!Content[i].empty() && i + 1 != e && + Decoration.startswith(Content[i])) continue; - while (!Lines[i].startswith(Decoration)) + while (!Content[i].startswith(Decoration)) Decoration = Decoration.substr(0, Decoration.size() - 1); } LastLineNeedsDecoration = true; - IndentAtLineBreak = StartOfLineColumn[0] + 1; - for (size_t i = 1; i < Lines.size(); ++i) { - if (Lines[i].empty()) { - if (i + 1 == Lines.size()) { + IndentAtLineBreak = ContentColumn[0] + 1; + for (size_t i = 1, e = Lines.size(); i < e; ++i) { + if (Content[i].empty()) { + if (i + 1 == e) { // Empty last line means that we already have a star as a part of the // trailing */. We also need to preserve whitespace, so that */ is // correctly indented. @@ -282,7 +340,7 @@ } else if (Decoration.empty()) { // For all other lines, set the start column to 0 if they're empty, so // we do not insert trailing whitespace anywhere. - StartOfLineColumn[i] = 0; + ContentColumn[i] = 0; } continue; } @@ -290,21 +348,23 @@ // The first line already excludes the star. // For all other lines, adjust the line to exclude the star and // (optionally) the first whitespace. - unsigned DecorationSize = - Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size(); - StartOfLineColumn[i] += DecorationSize; - Lines[i] = Lines[i].substr(DecorationSize); - LeadingWhitespace[i] += DecorationSize; - if (!Decoration.startswith(Lines[i])) + unsigned DecorationSize = Decoration.startswith(Content[i]) + ? Content[i].size() + : Decoration.size(); + ContentColumn[i] += DecorationSize; + Content[i] = Content[i].substr(DecorationSize); + if (!Decoration.startswith(Content[i])) IndentAtLineBreak = - std::min(IndentAtLineBreak, std::max(0, StartOfLineColumn[i])); + std::min(IndentAtLineBreak, std::max(0, ContentColumn[i])); } - IndentAtLineBreak = std::max(IndentAtLineBreak, Decoration.size()); + IndentAtLineBreak = + std::max(IndentAtLineBreak, Decoration.size()); + DEBUG({ llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; for (size_t i = 0; i < Lines.size(); ++i) { - llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] - << "\n"; + llvm::dbgs() << i << " |" << Content[i] << "| " + << (Content[i].data() - Lines[i].data()) << "\n"; } }); } @@ -334,78 +394,142 @@ StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); // Adjust Lines to only contain relevant text. - Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); - Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); - // Adjust LeadingWhitespace to account all whitespace between the lines - // to the current line. - LeadingWhitespace[LineIndex] = - Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); + size_t PreviousContentOffset = + Content[LineIndex - 1].data() - Lines[LineIndex - 1].data(); + Content[LineIndex - 1] = Lines[LineIndex - 1].substr( + PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset); + Content[LineIndex] = Lines[LineIndex].substr(StartOfLine); // Adjust the start column uniformly across all lines. - StartOfLineColumn[LineIndex] = + ContentColumn[LineIndex] = encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + IndentDelta; } -unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } - unsigned BreakableBlockComment::getLineLengthAfterSplit( - unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { - unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); - return ContentStartColumn + - encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), - ContentStartColumn, Style.TabWidth, - Encoding) + - // The last line gets a "*/" postfix. - (LineIndex + 1 == Lines.size() ? 2 : 0); -} - -BreakableToken::Split -BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit) const { - return getCommentSplit(Lines[LineIndex].substr(TailOffset), - getContentStartColumn(LineIndex, TailOffset), - ColumnLimit, Style.TabWidth, Encoding); + unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const { + unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset); + unsigned LineLength = + ContentStartColumn + encoding::columnWidthWithTabs( + Content[LineIndex].substr(TailOffset, Length), + ContentStartColumn, Style.TabWidth, Encoding); + // The last line gets a "*/" postfix. + if (LineIndex + 1 == Lines.size()) { + LineLength += 2; + // We never need a decoration when breaking just the trailing "*/" postfix. + // Note that checking that Length == 0 is not enough, since Length could + // also be StringRef::npos. + if (Content[LineIndex].substr(TailOffset, Length).empty()) { + LineLength -= Decoration.size(); + } + } + return LineLength; } void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) { - StringRef Text = Lines[LineIndex].substr(TailOffset); + StringRef Text = Content[LineIndex].substr(TailOffset); StringRef Prefix = Decoration; + unsigned LocalIndentAtLineBreak = IndentAtLineBreak; if (LineIndex + 1 == Lines.size() && Text.size() == Split.first + Split.second) { // For the last line we need to break before "*/", but not to add "* ". Prefix = ""; + if (LocalIndentAtLineBreak >= 2) + LocalIndentAtLineBreak -= 2; } - unsigned BreakOffsetInToken = - Text.data() - Tok.TokenText.data() + Split.first; + Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; unsigned CharsToRemove = Split.second; - assert(IndentAtLineBreak >= Decoration.size()); + assert(LocalIndentAtLineBreak >= Prefix.size()); Whitespaces.replaceWhitespaceInToken( - Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1, - IndentLevel, IndentAtLineBreak - Decoration.size()); + tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", Prefix, + InPPDirective, /*Newlines=*/1, IndentLevel, + /*Spaces=*/LocalIndentAtLineBreak - Prefix.size()); } -void BreakableBlockComment::replaceWhitespace(unsigned LineIndex, - unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { - StringRef Text = Lines[LineIndex].substr(TailOffset); - unsigned BreakOffsetInToken = - Text.data() - Tok.TokenText.data() + Split.first; - unsigned CharsToRemove = Split.second; - Whitespaces.replaceWhitespaceInToken( - Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false, - /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); +BreakableToken::Split BreakableBlockComment::getSplitBefore( + unsigned LineIndex, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + bool ReflowInProgress) const { + if (!mayReflow(LineIndex, ReflowInProgress)) return Split(StringRef::npos, 0); + StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); + return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn, + ColumnLimit); } +unsigned BreakableBlockComment::getReflownColumn( + StringRef Content, + unsigned LineIndex, + unsigned PreviousEndColumn) const { + // The whole line gets reflown, need to update the column limit. + unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); + // If this is the last line, it will carry around its '*/' postfix. + unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0); + unsigned ReflownColumn = + StartColumn + encoding::columnWidthWithTabs(Content, StartColumn, + Style.TabWidth, Encoding) + + PostfixLength; + return ReflownColumn; +} + +unsigned BreakableBlockComment::getLineLengthAfterSplitBefore( + unsigned LineIndex, unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const { + if (SplitBefore.first == StringRef::npos || + SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { + // A piece of line gets reflown. + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } else { + // The whole line gets reflown, need to check if we need to insert a break + // for the postfix or not. + StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); + unsigned ReflownColumn = + getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); + if (ReflownColumn <= ColumnLimit) { + return ReflownColumn; + } + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } +} void BreakableBlockComment::replaceWhitespaceBefore( - unsigned LineIndex, WhitespaceManager &Whitespaces) { - if (LineIndex == 0) + unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, + Split SplitBefore, WhitespaceManager &Whitespaces) { + if (LineIndex == 0) return; + StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); + if (SplitBefore.first != StringRef::npos) { + assert(Tokens[LineIndex - 1] == Tokens[LineIndex] && + "Reflowing whitespace within a token"); + unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + + Content[LineIndex - 1].size() - + tokenAt(LineIndex).TokenText.data(); + unsigned WhitespaceLength = TrimmedContent.data() - + tokenAt(LineIndex).TokenText.data() - + WhitespaceOffsetInToken; + Whitespaces.replaceWhitespaceInToken( + tokenAt(LineIndex), WhitespaceOffsetInToken, + /*ReplaceChars=*/WhitespaceLength, + /*PreviousPostfix=*/"", + /*CurrentPrefix=*/ReflowPrefix, InPPDirective, + /*Newlines=*/0, IndentLevel, /*Spaces=*/0); + Split BreakSplit = SplitBefore; + BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data(); + unsigned ReflownColumn = + getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); + if (ReflownColumn > ColumnLimit) { + insertBreak(LineIndex, 0, BreakSplit, Whitespaces); + } return; + } + + // Fix the decoration of the line at LineIndex. StringRef Prefix = Decoration; - if (Lines[LineIndex].empty()) { + if (Content[LineIndex].empty()) { if (LineIndex + 1 == Lines.size()) { if (!LastLineNeedsDecoration) { // If the last line was empty, we don't need a prefix, as the */ will @@ -418,19 +542,21 @@ Prefix = Prefix.substr(0, 1); } } else { - if (StartOfLineColumn[LineIndex] == 1) { + if (ContentColumn[LineIndex] == 1) { // This line starts immediately after the decorating *. Prefix = Prefix.substr(0, 1); } } - - unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - - Tok.TokenText.data() - - LeadingWhitespace[LineIndex]; + unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + + Content[LineIndex - 1].size() - + tokenAt(LineIndex).TokenText.data(); + unsigned WhitespaceLength = Content[LineIndex].data() - + tokenAt(LineIndex).TokenText.data() - + WhitespaceOffsetInToken; Whitespaces.replaceWhitespaceInToken( - Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, - InPPDirective, 1, IndentLevel, - StartOfLineColumn[LineIndex] - Prefix.size()); + tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", + Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, + ContentColumn[LineIndex] - Prefix.size()); } unsigned @@ -439,7 +565,203 @@ // If we break, we always break at the predefined indent. if (TailOffset != 0) return IndentAtLineBreak; - return std::max(0, StartOfLineColumn[LineIndex]); + return std::max(0, ContentColumn[LineIndex]); +} + +BreakableLineCommentSection::BreakableLineCommentSection( + const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, + unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableComment(Token, IndentLevel, StartColumn, OriginalStartColumn, + FirstInLine, InPPDirective, Encoding, Style) { + assert(Tok.is(TT_LineComment) && + "line comment section must start with a line comment"); + FormatToken *LineTok = nullptr; + for (const FormatToken *CurrentTok = &Tok; + CurrentTok && CurrentTok->is(TT_LineComment); + CurrentTok = CurrentTok->Next) { + LastLineTok = LineTok; + StringRef TokenText(CurrentTok->TokenText); + assert(TokenText.startswith("//")); + size_t FirstLineIndex = Lines.size(); + TokenText.split(Lines, "\n"); + Content.resize(Lines.size()); + ContentColumn.resize(Lines.size()); + OriginalContentColumn.resize(Lines.size()); + Tokens.resize(Lines.size()); + Prefix.resize(Lines.size()); + OriginalPrefix.resize(Lines.size()); + for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) { + StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i]); + OriginalPrefix[i] = Prefix[i] = IndentPrefix; + if (Lines[i].size() > Prefix[i].size() && + isAlphanumeric(Lines[i][Prefix[i].size()])) { + if (Prefix[i] == "//") + Prefix[i] = "// "; + else if (Prefix[i] == "///") + Prefix[i] = "/// "; + else if (Prefix[i] == "//!") + Prefix[i] = "//! "; + } + + Tokens[i] = LineTok; + Content[i] = Lines[i].substr(IndentPrefix.size()); + OriginalContentColumn[i] = + StartColumn + + encoding::columnWidthWithTabs(OriginalPrefix[i], + StartColumn, + Style.TabWidth, + Encoding); + ContentColumn[i] = + StartColumn + + encoding::columnWidthWithTabs(Prefix[i], + StartColumn, + Style.TabWidth, + Encoding); + + // Calculate the end of the non-whitespace text in this line. + size_t EndOfLine = Content[i].find_last_not_of(Blanks); + if (EndOfLine == StringRef::npos) + EndOfLine = Content[i].size(); + else + ++EndOfLine; + Content[i] = Content[i].substr(0, EndOfLine); + } + LineTok = CurrentTok->Next; + } +} + +unsigned BreakableLineCommentSection::getLineLengthAfterSplit( + unsigned LineIndex, unsigned TailOffset, + StringRef::size_type Length) const { + unsigned ContentStartColumn = + (TailOffset == 0 ? ContentColumn[LineIndex] + : OriginalContentColumn[LineIndex]); + return ContentStartColumn + encoding::columnWidthWithTabs( + Content[LineIndex].substr(TailOffset, Length), + ContentStartColumn, Style.TabWidth, Encoding); +} + +void BreakableLineCommentSection::insertBreak(unsigned LineIndex, + unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) { + StringRef Text = Content[LineIndex].substr(TailOffset); + unsigned BreakOffsetInToken = + Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; + unsigned CharsToRemove = Split.second; + unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] + + Prefix[LineIndex].size() - + OriginalPrefix[LineIndex].size(); + assert(IndentAtLineBreak >= Prefix[LineIndex].size()); + Whitespaces.replaceWhitespaceInToken( + tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", + Prefix[LineIndex], InPPDirective, /*Newlines=*/1, IndentLevel, + /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size()); +} + +BreakableComment::Split BreakableLineCommentSection::getSplitBefore( + unsigned LineIndex, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + bool ReflowInProgress) const { + if (!mayReflow(LineIndex, ReflowInProgress)) return Split(StringRef::npos, 0); + return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn, + ColumnLimit); +} + +unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore( + unsigned LineIndex, unsigned TailOffset, + unsigned PreviousEndColumn, + unsigned ColumnLimit, + Split SplitBefore) const { + if (SplitBefore.first == StringRef::npos || + SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { + // A piece of line gets reflown. + return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + } else { + // The whole line gets reflown, need to update the column limit. + unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); + return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex], + StartColumn, + Style.TabWidth, + Encoding); + } +} + +void BreakableLineCommentSection::replaceWhitespaceBefore( + unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, + Split SplitBefore, WhitespaceManager &Whitespaces) { + // If this is the first line of a token, we need to inform Whitespace Manager + // about it. + if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { + if (SplitBefore.first != StringRef::npos) { + // Replace the whitespace between the tokens. + Whitespaces.replaceWhitespace(*Tokens[LineIndex], + /*Newlines=*/0, + /*IndentLevel=*/IndentLevel, + /*Spaces=*/0, + /*StartOfTokenColumn=*/StartColumn, + /*InPPDirective=*/false); + // Replace the prefix of the token with the reflow prefix. + unsigned WhitespaceLength = + Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); + Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], + /*Offset=*/0, + /*ReplaceChars=*/WhitespaceLength, + /*PreviousPostfix=*/"", + /*CurrentPrefix=*/ReflowPrefix, + /*InPPDirective=*/false, + /*Newlines=*/0, + /*IndentLevel=*/IndentLevel, + /*Spaces=*/0); + } else { + // This is the first line for the current token. + // Adjust the start column if necessary. + unsigned LineColumn = + ContentColumn[LineIndex] - + (Content[LineIndex].data() - Lines[LineIndex].data()); + if (tokenAt(LineIndex).OriginalColumn != LineColumn) { + Whitespaces.replaceWhitespace(*Tokens[LineIndex], + /*Newlines=*/1, + /*IndentLevel=*/IndentLevel, + /*Spaces=*/LineColumn, + /*StartOfTokenColumn=*/LineColumn, + /*InPPDirective=*/false); + } else { + Whitespaces.addUntouchableToken(tokenAt(LineIndex), + /*InPPDirective=*/false); + } + } + } else if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) { + // Take care of the space possibly introduced after a decoration. + assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() && + "Expecting a block comment decoration to differ from original by " + "at most a space"); + Whitespaces.replaceWhitespaceInToken( + tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "", + /*InPPDirective=*/false, + /*Newlines=*/0, /*IndentLevel=*/0, + /*Spaces=*/1); + } + if (SplitBefore.first != StringRef::npos && + SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { + insertBreak(LineIndex, 0, SplitBefore, Whitespaces); + } +} + +void BreakableLineCommentSection::updateNextToken(LineState& State) const { + if (LastLineTok) { + State.NextToken = LastLineTok->Next; + } +} + +unsigned +BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, + unsigned TailOffset) const { + if (TailOffset != 0) { + return OriginalContentColumn[LineIndex]; + } + return ContentColumn[LineIndex]; } } // namespace format Index: lib/Format/CMakeLists.txt =================================================================== --- lib/Format/CMakeLists.txt +++ lib/Format/CMakeLists.txt @@ -3,7 +3,6 @@ add_clang_library(clangFormat AffectedRangeManager.cpp BreakableToken.cpp - Comments.cpp ContinuationIndenter.cpp Format.cpp FormatToken.cpp Index: lib/Format/Comments.h =================================================================== --- lib/Format/Comments.h +++ /dev/null @@ -1,33 +0,0 @@ -//===--- Comments.cpp - Comment manipulation -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief Declares comment manipulation functionality. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_COMMENTS_H -#define LLVM_CLANG_LIB_FORMAT_COMMENTS_H - -#include "clang/Basic/LLVM.h" -#include "llvm/ADT/StringRef.h" - -namespace clang { -namespace format { - -/// \brief Returns the comment prefix of the line comment \p Comment. -/// -/// The comment prefix consists of a leading known prefix, like "//" or "///", -/// together with the following whitespace. -StringRef getLineCommentIndentPrefix(StringRef Comment); - -} // namespace format -} // namespace clang - -#endif Index: lib/Format/Comments.cpp =================================================================== --- lib/Format/Comments.cpp +++ /dev/null @@ -1,36 +0,0 @@ -//===--- Comments.cpp - Comment Manipulation -------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief Implements comment manipulation. -/// -//===----------------------------------------------------------------------===// - -#include "Comments.h" - -namespace clang { -namespace format { - -StringRef getLineCommentIndentPrefix(StringRef Comment) { - static const char *const KnownPrefixes[] = {"///", "//", "//!"}; - StringRef LongestPrefix; - for (StringRef KnownPrefix : KnownPrefixes) { - if (Comment.startswith(KnownPrefix)) { - size_t PrefixLength = KnownPrefix.size(); - while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') - ++PrefixLength; - if (PrefixLength > LongestPrefix.size()) - LongestPrefix = Comment.substr(0, PrefixLength); - } - } - return LongestPrefix; -} - -} // namespace format -} // namespace clang Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -20,7 +20,7 @@ #include "clang/Format/Format.h" #include "llvm/Support/Debug.h" -#define DEBUG_TYPE "format-formatter" +#define DEBUG_TYPE "format-indenter" namespace clang { namespace format { @@ -1154,7 +1154,9 @@ } } else if (Current.is(TT_BlockComment)) { if (!Current.isTrailingComment() || !Style.ReflowComments || - CommentPragmasRegex.match(Current.TokenText.substr(2))) + CommentPragmasRegex.match(Current.TokenText.substr(2)) || + Current.TokenText.substr(2).ltrim().startswith("clang-format on") || + Current.TokenText.substr(2).ltrim().startswith("clang-format off")) return addMultilineToken(Current, State); Token.reset(new BreakableBlockComment( Current, State.Line->Level, StartColumn, Current.OriginalColumn, @@ -1163,11 +1165,14 @@ (Current.Previous == nullptr || Current.Previous->isNot(TT_ImplicitStringLiteral))) { if (!Style.ReflowComments || - CommentPragmasRegex.match(Current.TokenText.substr(2))) + CommentPragmasRegex.match(Current.TokenText.substr(2)) || + Current.TokenText.substr(2).ltrim().startswith("clang-format on") || + Current.TokenText.substr(2).ltrim().startswith("clang-format off")) return 0; - Token.reset(new BreakableLineComment(Current, State.Line->Level, - StartColumn, /*InPPDirective=*/false, - Encoding, Style)); + Token.reset(new BreakableLineCommentSection( + Current, State.Line->Level, StartColumn, Current.OriginalColumn, + !Current.Previous, + /*InPPDirective=*/false, Encoding, Style)); // We don't insert backslashes when breaking line comments. ColumnLimit = Style.ColumnLimit; } else { @@ -1178,15 +1183,25 @@ unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength; bool BreakInserted = false; + bool ReflowInProgress = false; unsigned Penalty = 0; unsigned RemainingTokenColumns = 0; for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); LineIndex != EndIndex; ++LineIndex) { - if (!DryRun) - Token->replaceWhitespaceBefore(LineIndex, Whitespaces); + BreakableToken::Split SplitBefore = Token->getSplitBefore( + LineIndex, RemainingTokenColumns, RemainingSpace, ReflowInProgress); unsigned TailOffset = 0; - RemainingTokenColumns = - Token->getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + if (SplitBefore.first != StringRef::npos) { + TailOffset = SplitBefore.first + SplitBefore.second; + ReflowInProgress = true; + } else { + ReflowInProgress = false; + } + if (!DryRun) + Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns, + RemainingSpace, SplitBefore, Whitespaces); + RemainingTokenColumns = Token->getLineLengthAfterSplitBefore( + LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore); while (RemainingTokenColumns > RemainingSpace) { BreakableToken::Split Split = Token->getSplit(LineIndex, TailOffset, ColumnLimit); @@ -1198,17 +1213,19 @@ break; } assert(Split.first != 0); - unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( - LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); // We can remove extra whitespace instead of breaking the line. if (RemainingTokenColumns + 1 - Split.second <= RemainingSpace) { - RemainingTokenColumns = 0; + RemainingTokenColumns = RemainingTokenColumns + 1 - Split.second; + ReflowInProgress = true; if (!DryRun) Token->replaceWhitespace(LineIndex, TailOffset, Split, Whitespaces); break; } + unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( + LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); + // When breaking before a tab character, it may be moved by a few columns, // but will still be expanded to the next tab stop, so we don't save any // columns. @@ -1226,6 +1243,7 @@ } TailOffset += Split.first + Split.second; RemainingTokenColumns = NewRemainingTokenColumns; + ReflowInProgress = true; BreakInserted = true; } } @@ -1246,6 +1264,9 @@ State.Stack.back().LastSpace = StartColumn; } + + Token->updateNextToken(State); + return Penalty; } Index: lib/Format/TokenAnnotator.cpp =================================================================== --- lib/Format/TokenAnnotator.cpp +++ lib/Format/TokenAnnotator.cpp @@ -1594,8 +1594,14 @@ for (SmallVectorImpl::reverse_iterator I = Lines.rbegin(), E = Lines.rend(); I != E; ++I) { - if (NextNonCommentLine && (*I)->First->is(tok::comment) && - (*I)->First->Next == nullptr) + bool CommentLine = (*I)->First; + for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) { + if (!Tok->is(tok::comment)) { + CommentLine = false; + break; + } + } + if (NextNonCommentLine && CommentLine) (*I)->Level = NextNonCommentLine->Level; else NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr; Index: lib/Format/UnwrappedLineParser.cpp =================================================================== --- lib/Format/UnwrappedLineParser.cpp +++ lib/Format/UnwrappedLineParser.cpp @@ -1998,7 +1998,9 @@ for (std::list::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); I != E; ++I) { - llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] "; + llvm::dbgs() << I->Tok->Tok.getName() << "[" + << "T=" << I->Tok->Type + << ", OC=" << I->Tok->OriginalColumn << "] "; } for (std::list::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); @@ -2038,13 +2040,60 @@ FormatTok.NewlinesBefore > 0; } +static bool isLineComment(const FormatToken &FormatTok) { + return FormatTok.is(tok::comment) && + FormatTok.TokenText.startswith("//"); +} + +// Checks if \p FormatTok is a line comment that continues the line comment +// section on \p Line. +static bool continuesLineComment(const FormatToken &FormatTok, + const UnwrappedLine &Line) { + if (Line.Tokens.empty()) + return false; + const FormatToken &FirstLineTok = *Line.Tokens.front().Tok; + // If Line starts with a line comment, then FormatTok continues the comment + // section if its original column is greater or equal to the original start + // column of the line. + // + // If Line starts with a a different token, then FormatTok continues the + // comment section if its original column greater than the original start + // column of the line. + // + // For example, the second line comment continues the first in these cases: + // // first line + // // second line + // and: + // // first line + // // second line + // and: + // int i; // first line + // // second line + // + // The second line comment doesn't continue the first in these cases: + // // first line + // // second line + // and: + // int i; // first line + // // second line + unsigned MinContinueColumn = + FirstLineTok.OriginalColumn + + ((isLineComment(FirstLineTok) && !FirstLineTok.Next) ? 0 : 1); + return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && + isLineComment(*(Line.Tokens.back().Tok)) && + FormatTok.OriginalColumn >= MinContinueColumn; +} + void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { bool JustComments = Line->Tokens.empty(); for (SmallVectorImpl::const_iterator I = CommentsBeforeNextToken.begin(), E = CommentsBeforeNextToken.end(); I != E; ++I) { - if (isOnNewLine(**I) && JustComments) + // Line comments that belong to the same line comment section are put on the + // same line since later we might want to reflow content between them. + // See BreakableToken. + if (isOnNewLine(**I) && JustComments && !continuesLineComment(**I, *Line)) addUnwrappedLine(); pushToken(*I); } @@ -2110,7 +2159,8 @@ if (!FormatTok->Tok.is(tok::comment)) return; - if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) { + if (!continuesLineComment(*FormatTok, *Line) && + (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { CommentsInCurrentLine = false; } if (CommentsInCurrentLine) { Index: lib/Format/WhitespaceManager.cpp =================================================================== --- lib/Format/WhitespaceManager.cpp +++ lib/Format/WhitespaceManager.cpp @@ -127,7 +127,8 @@ Changes[i - 1].IsTrailingComment = (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof || (Changes[i].IsInsideToken && Changes[i].Kind == tok::comment)) && - Changes[i - 1].Kind == tok::comment; + Changes[i - 1].Kind == tok::comment && + OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd; } // FIXME: The last token is currently not always an eof token; in those // cases, setting TokenLength of the last token to 0 is wrong. Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -1783,6 +1783,455 @@ " 0x00, 0x00, 0x00, 0x00}; // comment\n"); } +TEST_F(FormatTest, ReflowsComments) { + // Break a long line and reflow with the full next line. + EXPECT_EQ("// long long long\n" + "// long long", + format("// long long long long\n" + "// long", + getLLVMStyleWithColumns(20))); + + // Keep the trailing newline while reflowing. + EXPECT_EQ("// long long long\n" + "// long long\n", + format("// long long long long\n" + "// long\n", + getLLVMStyleWithColumns(20))); + + // Break a long line and reflow with a part of the next line. + EXPECT_EQ("// long long long\n" + "// long long\n" + "// long_long", + format("// long long long long\n" + "// long long_long", + getLLVMStyleWithColumns(20))); + + // Break but do not reflow if the first word from the next line is too long. + EXPECT_EQ("// long long long\n" + "// long\n" + "// long_long_long\n", + format("// long long long long\n" + "// long_long_long\n", + getLLVMStyleWithColumns(20))); + + // Don't break or reflow short lines. + verifyFormat("// long\n" + "// long long long lo\n" + "// long long long lo\n" + "// long", + getLLVMStyleWithColumns(20)); + + // Keep prefixes and decorations while reflowing. + EXPECT_EQ("/// long long long\n" + "/// long long\n", + format("/// long long long long\n" + "/// long\n", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("//! long long long\n" + "//! long long\n", + format("//! long long long long\n" + "//! long\n", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("/* long long long\n" + " * long long */", + format("/* long long long long\n" + " * long */", + getLLVMStyleWithColumns(20))); + + // Don't bring leading whitespace up while reflowing. + EXPECT_EQ("/* long long long\n" + " * long long long\n" + " */", + format("/* long long long long\n" + " * long long\n" + " */", + getLLVMStyleWithColumns(20))); + + // Reflow the last line of a block comment with its trailing '*/'. + EXPECT_EQ("/* long long long\n" + " long long */", + format("/* long long long long\n" + " long */", + getLLVMStyleWithColumns(20))); + + // Reflow two short lines; keep the postfix of the last one. + EXPECT_EQ("/* long long long\n" + " * long long long */", + format("/* long long long long\n" + " * long\n" + " * long */", + getLLVMStyleWithColumns(20))); + + // Put the postfix of the last short reflow line on a newline if it doesn't + // fit. + EXPECT_EQ("/* long long long\n" + " * long long longg\n" + " */", + format("/* long long long long\n" + " * long\n" + " * longg */", + getLLVMStyleWithColumns(20))); + + // Break single line block comments that are first in the line with ' *' + // decoration. + EXPECT_EQ("/* long long long\n" + " * long */", + format("/* long long long long */", getLLVMStyleWithColumns(20))); + + // Break single line block comment that are not first in the line with ' ' + // decoration. + EXPECT_EQ("int i; /* long long\n" + " long */", + format("int i; /* long long long */", getLLVMStyleWithColumns(20))); + + // Reflow a line that goes just over the column limit. + EXPECT_EQ("// long long long\n" + "// lon long", + format("// long long long lon\n" + "// long", + getLLVMStyleWithColumns(20))); + + // Stop reflowing if the next line has a different indentation than the + // previous line. + EXPECT_EQ("// long long long\n" + "// long\n" + "// long long\n" + "// long", + format("// long long long long\n" + "// long long\n" + "// long", + getLLVMStyleWithColumns(20))); + + // Reflow into the last part of a really long line that has been broken into + // multiple lines. + EXPECT_EQ("// long long long\n" + "// long long long\n" + "// long long long\n", + format("// long long long long long long long long\n" + "// long\n", + getLLVMStyleWithColumns(20))); + + // Break the first line, then reflow the beginning of the second and third + // line up. + EXPECT_EQ("// long long long\n" + "// lon1 lon2 lon2\n" + "// lon2 lon3 lon3", + format("// long long long lon1\n" + "// lon2 lon2 lon2\n" + "// lon3 lon3", + getLLVMStyleWithColumns(20))); + + // Reflow the beginning of the second line, then break the rest. + EXPECT_EQ("// long long long\n" + "// lon1 lon2 lon2\n" + "// lon2 lon2 lon2\n" + "// lon3", + format("// long long long lon1\n" + "// lon2 lon2 lon2 lon2 lon2 lon3", + getLLVMStyleWithColumns(20))); + + // Shrink the first line, then reflow the second line up. + EXPECT_EQ("// long long long", format("// long long\n" + "// long", + getLLVMStyleWithColumns(20))); + + // Don't shrink leading whitespace. + EXPECT_EQ("int i; /// a", + format("int i; /// a", getLLVMStyleWithColumns(20))); + + // Shrink trailing whitespace if there is no postfix and reflow. + EXPECT_EQ("// long long long\n" + "// long long", + format("// long long long long \n" + "// long", + getLLVMStyleWithColumns(20))); + + // Shrink trailing whitespace to a single one if there is postfix. + EXPECT_EQ("/* long long long */", + format("/* long long long */", getLLVMStyleWithColumns(20))); + + // Break a block comment postfix if exceeding the line limit. + EXPECT_EQ("/* long\n" + " */", + format("/* long */", getLLVMStyleWithColumns(20))); + + // Reflow indented comments. + EXPECT_EQ("{\n" + " // long long long\n" + " // long long\n" + " int i; /* long lon\n" + " g long\n" + " */\n" + "}", + format("{\n" + " // long long long long\n" + " // long\n" + " int i; /* long lon g\n" + " long */\n" + "}", + getLLVMStyleWithColumns(20))); + + // Don't realign trailing comments after reflow has happened. + EXPECT_EQ("// long long long\n" + "// long long\n" + "long i; // long", + format("// long long long long\n" + "// long\n" + "long i; // long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("// long long long\n" + "// longng long long\n" + "// long lo", + format("// long long long longng\n" + "// long long long\n" + "// lo", + getLLVMStyleWithColumns(20))); + + // Reflow lines after a broken line. + EXPECT_EQ("int a; // Trailing\n" + " // comment on\n" + " // 2 or 3\n" + " // lines.\n", + format("int a; // Trailing comment\n" + " // on 2\n" + " // or 3\n" + " // lines.\n", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("/// This long line\n" + "/// gets reflown.\n", + format("/// This long line gets\n" + "/// reflown.\n", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("//! This long line\n" + "//! gets reflown.\n", + format(" //! This long line gets\n" + " //! reflown.\n", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("/* This long line\n" + " * gets reflown.\n" + " */\n", + format("/* This long line gets\n" + " * reflown.\n" + " */\n", + getLLVMStyleWithColumns(20))); + + // Reflow after indentation makes a line too long. + EXPECT_EQ("{\n" + " // long long long\n" + " // lo long\n" + "}\n", + format("{\n" + "// long long long lo\n" + "// long\n" + "}\n", + getLLVMStyleWithColumns(20))); + + // Break and reflow multiple lines. + EXPECT_EQ("/*\n" + " * Reflow the end of\n" + " * line by 11 22 33\n" + " * 4.\n" + " */\n", + format("/*\n" + " * Reflow the end of line\n" + " * by\n" + " * 11\n" + " * 22\n" + " * 33\n" + " * 4.\n" + " */\n", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("/// First line gets\n" + "/// broken. Second\n" + "/// line gets\n" + "/// reflown and\n" + "/// broken. Third\n" + "/// gets reflown.\n", + format("/// First line gets broken.\n" + "/// Second line gets reflown and broken.\n" + "/// Third gets reflown.\n", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("int i; // first long\n" + " // long snd\n" + " // long.\n", + format("int i; // first long long\n" + " // snd long.\n", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("{\n" + " // first long line\n" + " // line second\n" + " // long line line\n" + " // third long line\n" + " // line\n" + "}\n", + format("{\n" + " // first long line line\n" + " // second long line line\n" + " // third long line line\n" + "}\n", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("int i; /* first line\n" + " * second\n" + " * line third\n" + " * line\n" + " */", + format("int i; /* first line\n" + " * second line\n" + " * third line\n" + " */", + getLLVMStyleWithColumns(20))); + + // Reflow the last two lines of a section that starts with a line having + // different indentation. + EXPECT_EQ( + "// long\n" + "// long long long\n" + "// long long", + format("// long\n" + "// long long long long\n" + "// long", + getLLVMStyleWithColumns(20))); + + // Keep the block comment endling '*/' while reflowing. + EXPECT_EQ("/* Long long long\n" + " * line short */\n", + format("/* Long long long line\n" + " * short */\n", + getLLVMStyleWithColumns(20))); + + // Don't reflow between separate blocks of comments. + EXPECT_EQ("/* First comment\n" + " * block will */\n" + "/* Snd\n" + " */\n", + format("/* First comment block\n" + " * will */\n" + "/* Snd\n" + " */\n", + getLLVMStyleWithColumns(20))); + + // Don't reflow across blank comment lines. + EXPECT_EQ("int i; // This long\n" + " // line gets\n" + " // broken.\n" + " // \n" + " // keep.\n", + format("int i; // This long line gets broken.\n" + " // \n" + " // keep.\n", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("{\n" + " /// long long long\n" + " /// long long\n" + " ///\n" + " /// long\n" + "}", + format("{\n" + " /// long long long long\n" + " /// long\n" + " ///\n" + " /// long\n" + "}", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("//! long long long\n" + "//! long\n" + "\n" + "//! long", + format("//! long long long long\n" + "\n" + "//! long", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("/* long long long\n" + " long\n" + "\n" + " long */", + format("/* long long long long\n" + "\n" + " long */", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("/* long long long\n" + " * long\n" + " *\n" + " * long */", + format("/* long long long long\n" + " *\n" + " * long */", + getLLVMStyleWithColumns(20))); + + // Don't reflow lines having content that is a single character. + EXPECT_EQ("// long long long\n" + "// long\n" + "// l", + format("// long long long long\n" + "// l", + getLLVMStyleWithColumns(20))); + + // Don't reflow lines starting with two punctuation characters. + EXPECT_EQ("// long long long\n" + "// long\n" + "// ... --- ...", + format( + "// long long long long\n" + "// ... --- ...", + getLLVMStyleWithColumns(20))); + + // Don't reflow between separate blocks of comments. + EXPECT_EQ("/* First comment\n" + " * block will */\n" + "/* Snd\n" + " */\n", + format("/* First comment block\n" + " * will */\n" + "/* Snd\n" + " */\n", + getLLVMStyleWithColumns(20))); + + // Don't reflow lines having different indentation. + EXPECT_EQ("// long long long\n" + "// long\n" + "// long", + format("// long long long long\n" + "// long", + getLLVMStyleWithColumns(20))); + + // Don't break or reflow after implicit string literals. + verifyFormat("#include // l l l\n" + " // l", + getLLVMStyleWithColumns(20)); + + // Don't break or reflow comments on import lines. + EXPECT_EQ("#include \"t\" /* l l l\n" + " * l */", + format("#include \"t\" /* l l l\n" + " * l */", + getLLVMStyleWithColumns(20))); + + // Don't reflow between different trailing comment sections. + EXPECT_EQ("int i; // long long\n" + " // long\n" + "int j; // long long\n" + " // long\n", + format("int i; // long long long\n" + "int j; // long long long\n", + getLLVMStyleWithColumns(20))); + + // Don't reflow if the first word on the next line is longer than the + // available space at current line. + EXPECT_EQ("int i; // trigger\n" + " // reflow\n" + " // longsec\n", + format("int i; // trigger reflow\n" + " // longsec\n", + getLLVMStyleWithColumns(20))); + + // Keep empty comment lines. + EXPECT_EQ("/**/", format(" /**/", getLLVMStyleWithColumns(20))); + EXPECT_EQ("/* */", format(" /* */", getLLVMStyleWithColumns(20))); + EXPECT_EQ("/* */", format(" /* */", getLLVMStyleWithColumns(20))); + EXPECT_EQ("//", format(" // ", getLLVMStyleWithColumns(20))); + EXPECT_EQ("///", format(" /// ", getLLVMStyleWithColumns(20))); +} + TEST_F(FormatTest, IgnoresIf0Contents) { EXPECT_EQ("#if 0\n" "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n" @@ -7238,11 +7687,9 @@ format("a = {1111 /* */\n" "};", getLLVMStyleWithColumns(15))); - - // FIXME: The formatting is still wrong here. EXPECT_EQ("a = {\n" " 1111 /* a\n" - " */\n" + " */\n" "};", format("a = {1111 /* a */\n" "};", @@ -10933,6 +11380,26 @@ " int j;\n" " /* clang-format on */\n" " int k;")); + + // Don't reflow comments within disabled regions. + EXPECT_EQ( + "// clang-format off\n" + "// long long long long long long line\n" + "/* clang-format on */\n" + "/* long long long\n" + " * long long long\n" + " * line */\n" + "int i;\n" + "/* clang-format off */\n" + "/* long long long long long long line */\n", + format("// clang-format off\n" + "// long long long long long long line\n" + "/* clang-format on */\n" + "/* long long long long long long line */\n" + "int i;\n" + "/* clang-format off */\n" + "/* long long long long long long line */\n", + getLLVMStyleWithColumns(20))); } TEST_F(FormatTest, DoNotCrashOnInvalidInput) { @@ -11102,18 +11569,6 @@ EXPECT_TRUE(static_cast(Result)); EXPECT_EQ(Expected, *Result); } - -TEST_F(FormatTest, AllignTrailingComments) { - EXPECT_EQ("#define MACRO(V) \\\n" - " V(Rt2) /* one more char */ \\\n" - " V(Rs) /* than here */ \\\n" - "/* comment 3 */\n", - format("#define MACRO(V)\\\n" - "V(Rt2) /* one more char */ \\\n" - "V(Rs) /* than here */ \\\n" - "/* comment 3 */ \\\n", - getLLVMStyleWithColumns(40))); -} } // end namespace } // end namespace format } // end namespace clang Index: unittests/Format/FormatTestSelective.cpp =================================================================== --- unittests/Format/FormatTestSelective.cpp +++ unittests/Format/FormatTestSelective.cpp @@ -111,13 +111,19 @@ format("int a; // comment\n" "int b; // comment", 0, 0)); - EXPECT_EQ("int a; // comment\n" - " // line 2\n" + EXPECT_EQ("int a; // comment\n" + " // line 2\n" "int b;", format("int a; // comment\n" " // line 2\n" "int b;", 28, 0)); + EXPECT_EQ("int a; // comment\n" + "// comment 2\n" + "int b;", + format("int a; // comment\n" + "// comment 2\n" + "int b;", 28, 0)); EXPECT_EQ("int aaaaaa; // comment\n" "int b;\n" "int c; // unrelated comment",