Skip to content

Commit 9183422

Browse files
committedJan 25, 2017
[clang-format] Implement comment reflowing.
Summary: This presents a version of the comment reflowing with less mutable state inside the comment breakable token subclasses. The state has been pushed into the driving breakProtrudingToken method. For this, the API of BreakableToken is enriched by the methods getSplitBefore and getLineLengthAfterSplitBefore. Reviewers: klimek Reviewed By: klimek Subscribers: djasper, klimek, mgorny, cfe-commits, ioeric Differential Revision: https://reviews.llvm.org/D28764 llvm-svn: 293055
1 parent f242ffa commit 9183422

11 files changed

+1377
-294
lines changed
 

‎clang/lib/Format/BreakableToken.cpp

+510-122
Large diffs are not rendered by default.

‎clang/lib/Format/BreakableToken.h

+244-75
Large diffs are not rendered by default.

‎clang/lib/Format/CMakeLists.txt

-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ set(LLVM_LINK_COMPONENTS support)
33
add_clang_library(clangFormat
44
AffectedRangeManager.cpp
55
BreakableToken.cpp
6-
Comments.cpp
76
ContinuationIndenter.cpp
87
Format.cpp
98
FormatToken.cpp

‎clang/lib/Format/Comments.cpp

-36
This file was deleted.

‎clang/lib/Format/Comments.h

-33
This file was deleted.

‎clang/lib/Format/ContinuationIndenter.cpp

+44-16
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#include "clang/Format/Format.h"
2121
#include "llvm/Support/Debug.h"
2222

23-
#define DEBUG_TYPE "format-formatter"
23+
#define DEBUG_TYPE "format-indenter"
2424

2525
namespace clang {
2626
namespace format {
@@ -1154,7 +1154,11 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
11541154
}
11551155
} else if (Current.is(TT_BlockComment)) {
11561156
if (!Current.isTrailingComment() || !Style.ReflowComments ||
1157-
CommentPragmasRegex.match(Current.TokenText.substr(2)))
1157+
CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
1158+
// If a comment token switches formatting, like
1159+
// /* clang-format on */, we don't want to break it further,
1160+
// but we may still want to adjust its indentation.
1161+
switchesFormatting(Current))
11581162
return addMultilineToken(Current, State);
11591163
Token.reset(new BreakableBlockComment(
11601164
Current, State.Line->Level, StartColumn, Current.OriginalColumn,
@@ -1163,11 +1167,13 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
11631167
(Current.Previous == nullptr ||
11641168
Current.Previous->isNot(TT_ImplicitStringLiteral))) {
11651169
if (!Style.ReflowComments ||
1166-
CommentPragmasRegex.match(Current.TokenText.substr(2)))
1170+
CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
1171+
switchesFormatting(Current))
11671172
return 0;
1168-
Token.reset(new BreakableLineComment(Current, State.Line->Level,
1169-
StartColumn, /*InPPDirective=*/false,
1170-
Encoding, Style));
1173+
Token.reset(new BreakableLineCommentSection(
1174+
Current, State.Line->Level, StartColumn, Current.OriginalColumn,
1175+
!Current.Previous,
1176+
/*InPPDirective=*/false, Encoding, Style));
11711177
// We don't insert backslashes when breaking line comments.
11721178
ColumnLimit = Style.ColumnLimit;
11731179
} else {
@@ -1178,15 +1184,27 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
11781184

11791185
unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
11801186
bool BreakInserted = false;
1187+
// We use a conservative reflowing strategy. Reflow starts after a line is
1188+
// broken or the corresponding whitespace compressed. Reflow ends as soon as a
1189+
// line that doesn't get reflown with the previous line is reached.
1190+
bool ReflowInProgress = false;
11811191
unsigned Penalty = 0;
11821192
unsigned RemainingTokenColumns = 0;
11831193
for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
11841194
LineIndex != EndIndex; ++LineIndex) {
1195+
BreakableToken::Split SplitBefore(StringRef::npos, 0);
1196+
if (ReflowInProgress) {
1197+
SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,
1198+
RemainingSpace);
1199+
}
1200+
ReflowInProgress = SplitBefore.first != StringRef::npos;
1201+
unsigned TailOffset =
1202+
ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0;
11851203
if (!DryRun)
1186-
Token->replaceWhitespaceBefore(LineIndex, Whitespaces);
1187-
unsigned TailOffset = 0;
1188-
RemainingTokenColumns =
1189-
Token->getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
1204+
Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns,
1205+
RemainingSpace, SplitBefore, Whitespaces);
1206+
RemainingTokenColumns = Token->getLineLengthAfterSplitBefore(
1207+
LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore);
11901208
while (RemainingTokenColumns > RemainingSpace) {
11911209
BreakableToken::Split Split =
11921210
Token->getSplit(LineIndex, TailOffset, ColumnLimit);
@@ -1198,17 +1216,23 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
11981216
break;
11991217
}
12001218
assert(Split.first != 0);
1201-
unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
1202-
LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
12031219

1204-
// We can remove extra whitespace instead of breaking the line.
1205-
if (RemainingTokenColumns + 1 - Split.second <= RemainingSpace) {
1206-
RemainingTokenColumns = 0;
1220+
// Check if compressing the whitespace range will bring the line length
1221+
// under the limit. If that is the case, we perform whitespace compression
1222+
// instead of inserting a line break.
1223+
unsigned RemainingTokenColumnsAfterCompression =
1224+
Token->getLineLengthAfterCompression(RemainingTokenColumns, Split);
1225+
if (RemainingTokenColumnsAfterCompression <= RemainingSpace) {
1226+
RemainingTokenColumns = RemainingTokenColumnsAfterCompression;
1227+
ReflowInProgress = true;
12071228
if (!DryRun)
1208-
Token->replaceWhitespace(LineIndex, TailOffset, Split, Whitespaces);
1229+
Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
12091230
break;
12101231
}
12111232

1233+
unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
1234+
LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
1235+
12121236
// When breaking before a tab character, it may be moved by a few columns,
12131237
// but will still be expanded to the next tab stop, so we don't save any
12141238
// columns.
@@ -1226,6 +1250,7 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
12261250
}
12271251
TailOffset += Split.first + Split.second;
12281252
RemainingTokenColumns = NewRemainingTokenColumns;
1253+
ReflowInProgress = true;
12291254
BreakInserted = true;
12301255
}
12311256
}
@@ -1246,6 +1271,9 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
12461271

12471272
State.Stack.back().LastSpace = StartColumn;
12481273
}
1274+
1275+
Token->updateNextToken(State);
1276+
12491277
return Penalty;
12501278
}
12511279

‎clang/lib/Format/TokenAnnotator.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -1594,8 +1594,14 @@ void TokenAnnotator::setCommentLineLevels(
15941594
for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
15951595
E = Lines.rend();
15961596
I != E; ++I) {
1597-
if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
1598-
(*I)->First->Next == nullptr)
1597+
bool CommentLine = (*I)->First;
1598+
for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
1599+
if (!Tok->is(tok::comment)) {
1600+
CommentLine = false;
1601+
break;
1602+
}
1603+
}
1604+
if (NextNonCommentLine && CommentLine)
15991605
(*I)->Level = NextNonCommentLine->Level;
16001606
else
16011607
NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;

‎clang/lib/Format/UnwrappedLineParser.cpp

+53-3
Original file line numberDiff line numberDiff line change
@@ -1998,7 +1998,9 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
19981998
for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
19991999
E = Line.Tokens.end();
20002000
I != E; ++I) {
2001-
llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
2001+
llvm::dbgs() << I->Tok->Tok.getName() << "["
2002+
<< "T=" << I->Tok->Type
2003+
<< ", OC=" << I->Tok->OriginalColumn << "] ";
20022004
}
20032005
for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
20042006
E = Line.Tokens.end();
@@ -2038,13 +2040,60 @@ bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
20382040
FormatTok.NewlinesBefore > 0;
20392041
}
20402042

2043+
static bool isLineComment(const FormatToken &FormatTok) {
2044+
return FormatTok.is(tok::comment) &&
2045+
FormatTok.TokenText.startswith("//");
2046+
}
2047+
2048+
// Checks if \p FormatTok is a line comment that continues the line comment
2049+
// section on \p Line.
2050+
static bool continuesLineComment(const FormatToken &FormatTok,
2051+
const UnwrappedLine &Line) {
2052+
if (Line.Tokens.empty())
2053+
return false;
2054+
const FormatToken &FirstLineTok = *Line.Tokens.front().Tok;
2055+
// If Line starts with a line comment, then FormatTok continues the comment
2056+
// section if its original column is greater or equal to the original start
2057+
// column of the line.
2058+
//
2059+
// If Line starts with a a different token, then FormatTok continues the
2060+
// comment section if its original column greater than the original start
2061+
// column of the line.
2062+
//
2063+
// For example, the second line comment continues the first in these cases:
2064+
// // first line
2065+
// // second line
2066+
// and:
2067+
// // first line
2068+
// // second line
2069+
// and:
2070+
// int i; // first line
2071+
// // second line
2072+
//
2073+
// The second line comment doesn't continue the first in these cases:
2074+
// // first line
2075+
// // second line
2076+
// and:
2077+
// int i; // first line
2078+
// // second line
2079+
unsigned MinContinueColumn =
2080+
FirstLineTok.OriginalColumn +
2081+
((isLineComment(FirstLineTok) && !FirstLineTok.Next) ? 0 : 1);
2082+
return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
2083+
isLineComment(*(Line.Tokens.back().Tok)) &&
2084+
FormatTok.OriginalColumn >= MinContinueColumn;
2085+
}
2086+
20412087
void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
20422088
bool JustComments = Line->Tokens.empty();
20432089
for (SmallVectorImpl<FormatToken *>::const_iterator
20442090
I = CommentsBeforeNextToken.begin(),
20452091
E = CommentsBeforeNextToken.end();
20462092
I != E; ++I) {
2047-
if (isOnNewLine(**I) && JustComments)
2093+
// Line comments that belong to the same line comment section are put on the
2094+
// same line since later we might want to reflow content between them.
2095+
// See BreakableToken.
2096+
if (isOnNewLine(**I) && JustComments && !continuesLineComment(**I, *Line))
20482097
addUnwrappedLine();
20492098
pushToken(*I);
20502099
}
@@ -2110,7 +2159,8 @@ void UnwrappedLineParser::readToken() {
21102159

21112160
if (!FormatTok->Tok.is(tok::comment))
21122161
return;
2113-
if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
2162+
if (!continuesLineComment(*FormatTok, *Line) &&
2163+
(isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
21142164
CommentsInCurrentLine = false;
21152165
}
21162166
if (CommentsInCurrentLine) {

‎clang/lib/Format/WhitespaceManager.cpp

+32-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,38 @@ void WhitespaceManager::calculateLineBreakInformation() {
127127
Changes[i - 1].IsTrailingComment =
128128
(Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof ||
129129
(Changes[i].IsInsideToken && Changes[i].Kind == tok::comment)) &&
130-
Changes[i - 1].Kind == tok::comment;
130+
Changes[i - 1].Kind == tok::comment &&
131+
// FIXME: This is a dirty hack. The problem is that
132+
// BreakableLineCommentSection does comment reflow changes and here is
133+
// the aligning of trailing comments. Consider the case where we reflow
134+
// the second line up in this example:
135+
//
136+
// // line 1
137+
// // line 2
138+
//
139+
// That amounts to 2 changes by BreakableLineCommentSection:
140+
// - the first, delimited by (), for the whitespace between the tokens,
141+
// - and second, delimited by [], for the whitespace at the beginning
142+
// of the second token:
143+
//
144+
// // line 1(
145+
// )[// ]line 2
146+
//
147+
// So in the end we have two changes like this:
148+
//
149+
// // line1()[ ]line 2
150+
//
151+
// Note that the OriginalWhitespaceStart of the second change is the
152+
// same as the PreviousOriginalWhitespaceEnd of the first change.
153+
// In this case, the below check ensures that the second change doesn't
154+
// get treated as a trailing comment change here, since this might
155+
// trigger additional whitespace to be wrongly inserted before "line 2"
156+
// by the comment aligner here.
157+
//
158+
// For a proper solution we need a mechanism to say to WhitespaceManager
159+
// that a particular change breaks the current sequence of trailing
160+
// comments.
161+
OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd;
131162
}
132163
// FIXME: The last token is currently not always an eof token; in those
133164
// cases, setting TokenLength of the last token to 0 is wrong.

‎clang/unittests/Format/FormatTest.cpp

+478-3
Large diffs are not rendered by default.

‎clang/unittests/Format/FormatTestSelective.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,19 @@ TEST_F(FormatTestSelective, FormatsCommentsLocally) {
111111
format("int a; // comment\n"
112112
"int b; // comment",
113113
0, 0));
114-
EXPECT_EQ("int a; // comment\n"
115-
" // line 2\n"
114+
EXPECT_EQ("int a; // comment\n"
115+
" // line 2\n"
116116
"int b;",
117117
format("int a; // comment\n"
118118
" // line 2\n"
119119
"int b;",
120120
28, 0));
121+
EXPECT_EQ("int a; // comment\n"
122+
"// comment 2\n"
123+
"int b;",
124+
format("int a; // comment\n"
125+
"// comment 2\n"
126+
"int b;", 28, 0));
121127
EXPECT_EQ("int aaaaaa; // comment\n"
122128
"int b;\n"
123129
"int c; // unrelated comment",

0 commit comments

Comments
 (0)
Please sign in to comment.