Index: lib/Format/BreakableToken.h =================================================================== --- lib/Format/BreakableToken.h +++ lib/Format/BreakableToken.h @@ -21,6 +21,7 @@ #include "Encoding.h" #include "TokenAnnotator.h" #include "WhitespaceManager.h" +#include "llvm/Support/Regex.h" #include namespace clang { @@ -118,7 +119,8 @@ /// needs to be reformatted before any breaks are made. virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit) const { + unsigned ColumnLimit, + llvm::Regex& CommentPragmasRegex) const { return Split(StringRef::npos, 0); } @@ -238,7 +240,8 @@ // Checks if the content of line LineIndex may be reflown with the previous // line. - bool mayReflow(unsigned LineIndex) const; + virtual bool mayReflow(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const = 0; // Contains the original text of the lines of the block comment. // @@ -307,7 +310,8 @@ void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override; Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit) const override; + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const override; unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, unsigned TailOffset, unsigned PreviousEndColumn, @@ -317,6 +321,8 @@ unsigned ColumnLimit, Split SplitBefore, WhitespaceManager &Whitespaces) override; + bool mayReflow(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const override; private: // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex]. @@ -371,7 +377,8 @@ void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) override; Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit) const override; + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const override; unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, unsigned TailOffset, unsigned PreviousEndColumn, unsigned ColumnLimit, @@ -380,6 +387,8 @@ unsigned ColumnLimit, Split SplitBefore, WhitespaceManager &Whitespaces) override; void updateNextToken(LineState& State) const override; + bool mayReflow(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const override; private: unsigned getContentStartColumn(unsigned LineIndex, Index: lib/Format/BreakableToken.cpp =================================================================== --- lib/Format/BreakableToken.cpp +++ lib/Format/BreakableToken.cpp @@ -321,13 +321,6 @@ (!isPunctuation(Content[0]) || !isPunctuation(Content[1])); } -bool BreakableComment::mayReflow(unsigned LineIndex) const { - return LineIndex > 0 && mayReflowContent(Content[LineIndex]) && - !Tok.Finalized && !switchesFormatting(tokenAt(LineIndex)) && - (!Tok.is(TT_LineComment) || - OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]); -} - BreakableBlockComment::BreakableBlockComment( const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, @@ -501,8 +494,9 @@ BreakableToken::Split BreakableBlockComment::getSplitBefore( unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit) const { - if (!mayReflow(LineIndex)) + unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const { + if (!mayReflow(LineIndex, CommentPragmasRegex)) return Split(StringRef::npos, 0); StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn, @@ -622,6 +616,19 @@ InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size()); } +bool BreakableBlockComment::mayReflow(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const { + // Content[LineIndex] may exclude the indent after the '*' decoration. In that + // case, we compute the start of the comment pragma manually. + StringRef IndentContent = Content[LineIndex]; + if (Lines[LineIndex].ltrim(Blanks).startswith("*")) { + IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1); + } + return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && + mayReflowContent(Content[LineIndex]) && !Tok.Finalized && + !switchesFormatting(tokenAt(LineIndex)); +} + unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const { @@ -748,10 +755,10 @@ } BreakableComment::Split BreakableLineCommentSection::getSplitBefore( - unsigned LineIndex, - unsigned PreviousEndColumn, - unsigned ColumnLimit) const { - if (!mayReflow(LineIndex)) return Split(StringRef::npos, 0); + unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, + llvm::Regex &CommentPragmasRegex) const { + if (!mayReflow(LineIndex, CommentPragmasRegex)) + return Split(StringRef::npos, 0); return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn, ColumnLimit); } @@ -850,6 +857,20 @@ } } +bool BreakableLineCommentSection::mayReflow( + unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const { + // Line comments have the indent as part of the prefix, so we need to + // recompute the start of the line. + StringRef IndentContent = Content[LineIndex]; + if (Lines[LineIndex].startswith("//")) { + IndentContent = Lines[LineIndex].substr(2); + } + return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && + mayReflowContent(Content[LineIndex]) && !Tok.Finalized && + !switchesFormatting(tokenAt(LineIndex)) && + OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]; +} + unsigned BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const { Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -1213,7 +1213,7 @@ BreakableToken::Split SplitBefore(StringRef::npos, 0); if (ReflowInProgress) { SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns, - RemainingSpace); + RemainingSpace, CommentPragmasRegex); } ReflowInProgress = SplitBefore.first != StringRef::npos; unsigned TailOffset = Index: lib/Format/UnwrappedLineParser.h =================================================================== --- lib/Format/UnwrappedLineParser.h +++ lib/Format/UnwrappedLineParser.h @@ -19,6 +19,7 @@ #include "FormatToken.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Format/Format.h" +#include "llvm/Support/Regex.h" #include #include @@ -161,6 +162,8 @@ const FormatStyle &Style; const AdditionalKeywords &Keywords; + + llvm::Regex CommentPragmasRegex; FormatTokenSource *Tokens; UnwrappedLineConsumer &Callback; Index: lib/Format/UnwrappedLineParser.cpp =================================================================== --- lib/Format/UnwrappedLineParser.cpp +++ lib/Format/UnwrappedLineParser.cpp @@ -202,7 +202,8 @@ ArrayRef Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), - CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr), + CurrentLines(&Lines), Style(Style), Keywords(Keywords), + CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} void UnwrappedLineParser::reset() { @@ -2048,10 +2049,18 @@ // Checks if \p FormatTok is a line comment that continues the line comment // section on \p Line. static bool continuesLineComment(const FormatToken &FormatTok, - const UnwrappedLine &Line) { + const UnwrappedLine &Line, + llvm::Regex &CommentPragmasRegex) { if (Line.Tokens.empty()) return false; + StringRef IndentContent = FormatTok.TokenText; + if (FormatTok.TokenText.startswith("//") || + FormatTok.TokenText.startswith("/*")) + IndentContent = FormatTok.TokenText.substr(2); + if (CommentPragmasRegex.match(IndentContent)) + return false; + // If Line starts with a line comment, then FormatTok continues the comment // section if its original column is greater or equal to the original start // column of the line. @@ -2066,31 +2075,58 @@ // original start column of the min column token of the line. // // For example, the second line comment continues the first in these cases: + // // // first line // // second line + // // and: + // // // first line // // second line + // // and: + // // int i; // first line // // second line + // // and: + // // do { // first line // // second line // int i; // } while (true); // + // and: + // + // enum { + // a, // first line + // // second line + // b + // }; + // // The second line comment doesn't continue the first in these cases: + // // // first line // // second line + // // and: + // // int i; // first line // // second line + // // and: + // // do { // first line // // second line // int i; // } while (true); + // + // and: + // + // enum { + // a, // first line + // // second line + // }; const FormatToken *MinColumnToken = Line.Tokens.front().Tok; // Scan for '{//'. If found, use the column of '{' as a min column for line @@ -2103,6 +2139,11 @@ break; } PreviousToken = Node.Tok; + + // Grab the last newline preceding a token in this unwrapped line. + if (Node.Tok->NewlinesBefore > 0) { + MinColumnToken = Node.Tok; + } } if (PreviousToken && PreviousToken->is(tok::l_brace)) { MinColumnToken = PreviousToken; @@ -2130,7 +2171,9 @@ // // FIXME: Consider putting separate line comment sections as children to the // unwrapped line instead. - if (isOnNewLine(**I) && JustComments && !continuesLineComment(**I, *Line)) + (*I)->ContinuesLineCommentSection = + continuesLineComment(**I, *Line, CommentPragmasRegex); + if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) addUnwrappedLine(); pushToken(*I); } @@ -2196,7 +2239,9 @@ if (!FormatTok->Tok.is(tok::comment)) return; - if (!continuesLineComment(*FormatTok, *Line) && + FormatTok->ContinuesLineCommentSection = + continuesLineComment(*FormatTok, *Line, CommentPragmasRegex); + if (!FormatTok->ContinuesLineCommentSection && (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { CommentsInCurrentLine = false; } Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -2331,6 +2331,22 @@ "// XXX: long", getLLVMStyleWithColumns(20))); + // Don't reflow comment pragmas. + EXPECT_EQ("// long long long\n" + "// long\n" + "// IWYU pragma:", + format("// long long long long\n" + "// IWYU pragma:", + getLLVMStyleWithColumns(20))); + EXPECT_EQ("/* long long long\n" + " * long\n" + " * IWYU pragma:\n" + " */", + format("/* long long long long\n" + " * IWYU pragma:\n" + " */", + getLLVMStyleWithColumns(20))); + // Reflow lines that have a non-punctuation character among their first 2 // characters. EXPECT_EQ("// long long long\n"