Index: lib/Format/WhitespaceManager.h =================================================================== --- lib/Format/WhitespaceManager.h +++ lib/Format/WhitespaceManager.h @@ -149,6 +149,14 @@ // the alignment process. const Change *StartOfBlockComment; int IndentationOffset; + + // A combination of nesting level and indent level, which are used in + // tandem to compute lexical scope, for the purposes of deciding + // when to stop consecutive alignment runs. + std::pair + nestingAndIndentLevel() const { + return std::make_pair(Tok->NestingLevel, Tok->IndentLevel); + } }; private: Index: lib/Format/WhitespaceManager.cpp =================================================================== --- lib/Format/WhitespaceManager.cpp +++ lib/Format/WhitespaceManager.cpp @@ -50,9 +50,9 @@ if (Tok.Finalized) return; Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue; - Changes.push_back(Change(Tok, /*CreateReplacement=*/true, - Tok.WhitespaceRange, Spaces, StartOfTokenColumn, - Newlines, "", "", InPPDirective && !Tok.IsFirst, + Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange, + Spaces, StartOfTokenColumn, Newlines, "", "", + InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/false)); } @@ -192,21 +192,56 @@ SmallVector &Changes) { bool FoundMatchOnLine = false; int Shift = 0; + + // ScopeStack keeps track of the current scope depth. It contains indices of + // the first token on each scope. + // We only run the "Matches" function on tokens from the outer-most scope. + // However, we do need to pay special attention to one class of tokens + // that are not in the outer-most scope, and that is function parameters + // which are split across multiple lines, as illustrated by this example: + // double a(int x); + // int b(int y, + // double z); + // In the above example, we need to take special care to ensure that + // 'double z' is indented along with it's owning function 'b'. + SmallVector ScopeStack; + for (unsigned i = Start; i != End; ++i) { - if (Changes[i].NewlinesBefore > 0) { - FoundMatchOnLine = false; + if (ScopeStack.size() != 0 && + Changes[i].nestingAndIndentLevel() < + Changes[ScopeStack.back()].nestingAndIndentLevel()) + ScopeStack.pop_back(); + + if (i != Start && Changes[i].nestingAndIndentLevel() > + Changes[i - 1].nestingAndIndentLevel()) + ScopeStack.push_back(i); + + bool InsideNestedScope = ScopeStack.size() != 0; + + if (Changes[i].NewlinesBefore > 0 && !InsideNestedScope) { Shift = 0; + FoundMatchOnLine = false; } // If this is the first matching token to be aligned, remember by how many // spaces it has to be shifted, so the rest of the changes on the line are // shifted by the same amount - if (!FoundMatchOnLine && Matches(Changes[i])) { + if (!FoundMatchOnLine && !InsideNestedScope && Matches(Changes[i])) { FoundMatchOnLine = true; Shift = Column - Changes[i].StartOfTokenColumn; Changes[i].Spaces += Shift; } + // This is for function parameters that are split across multiple lines, + // as mentioned in the ScopeStack comment. + if (InsideNestedScope && Changes[i].NewlinesBefore > 0) { + unsigned ScopeStart = ScopeStack.back(); + if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName) || + (ScopeStart > Start + 1 && + Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName))) + Changes[i].Spaces += Shift; + } + assert(Shift >= 0); Changes[i].StartOfTokenColumn += Shift; if (i + 1 != Changes.size()) @@ -214,15 +249,37 @@ } } -// Walk through all of the changes and find sequences of matching tokens to -// align. To do so, keep track of the lines and whether or not a matching token -// was found on a line. If a matching token is found, extend the current -// sequence. If the current line cannot be part of a sequence, e.g. because -// there is an empty line before it or it contains only non-matching tokens, -// finalize the previous sequence. +// Walk through a subset of the changes, starting at StartAt, and find +// sequences of matching tokens to align. To do so, keep track of the lines and +// whether or not a matching token was found on a line. If a matching token is +// found, extend the current sequence. If the current line cannot be part of a +// sequence, e.g. because there is an empty line before it or it contains only +// non-matching tokens, finalize the previous sequence. +// The value returned is the token on which we stopped, either because we +// exhausted all items inside Changes, or because we hit a scope level higher +// than our initial scope. +// This function is recursive. Each invocation processes only the scope level +// equal to the initial level, which is the level of Changes[StartAt]. +// If we encounter a scope level greater than the initial level, then we call +// ourselves recursively, thereby avoiding the pollution of the current state +// with the alignment requirements of the nested sub-level. This recursive +// behavior is necessary for aligning function prototypes that have one or more +// arguments. +// If this function encounters a scope level less than the initial level, +// it returns the current position. +// There is a non-obvious subtlety in the recursive behavior: Even though we +// defer processing of nested levels to recursive invocations of this +// function, when it comes time to align a sequence of tokens, we run the +// alignment on the entire sequence, including the nested levels. +// When doing so, most of the nested tokens are skipped, because their +// alignment was already handled by the recursive invocations of this function. +// However, the special exception is that we do NOT skip function parameters +// that are split across multiple lines. See the test case in FormatTest.cpp +// that mentions "split function parameter alignment" for an example of this. template -static void AlignTokens(const FormatStyle &Style, F &&Matches, - SmallVector &Changes) { +static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, + SmallVector &Changes, + unsigned StartAt) { unsigned MinColumn = 0; unsigned MaxColumn = UINT_MAX; @@ -230,14 +287,11 @@ unsigned StartOfSequence = 0; unsigned EndOfSequence = 0; - // Keep track of the nesting level of matching tokens, i.e. the number of - // surrounding (), [], or {}. We will only align a sequence of matching - // token that share the same scope depth. - // - // FIXME: This could use FormatToken::NestingLevel information, but there is - // an outstanding issue wrt the brace scopes. - unsigned NestingLevelOfLastMatch = 0; - unsigned NestingLevel = 0; + // Measure the scope level (i.e. depth of (), [], {}) of the first token, and + // abort when we hit any token in a higher scope than the starting one. + auto NestingAndIndentLevel = StartAt < Changes.size() + ? Changes[StartAt].nestingAndIndentLevel() + : std::pair(0, 0); // Keep track of the number of commas before the matching tokens, we will only // align a sequence of matching tokens if they are preceded by the same number @@ -265,7 +319,11 @@ EndOfSequence = 0; }; - for (unsigned i = 0, e = Changes.size(); i != e; ++i) { + unsigned i = StartAt; + for (unsigned e = Changes.size(); i != e; ++i) { + if (Changes[i].nestingAndIndentLevel() < NestingAndIndentLevel) + break; + if (Changes[i].NewlinesBefore != 0) { CommasBeforeMatch = 0; EndOfSequence = i; @@ -279,29 +337,22 @@ if (Changes[i].Tok->is(tok::comma)) { ++CommasBeforeMatch; - } else if (Changes[i].Tok->isOneOf(tok::r_brace, tok::r_paren, - tok::r_square)) { - --NestingLevel; - } else if (Changes[i].Tok->isOneOf(tok::l_brace, tok::l_paren, - tok::l_square)) { - // We want sequences to skip over child scopes if possible, but not the - // other way around. - NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel); - ++NestingLevel; + } else if (Changes[i].nestingAndIndentLevel() > NestingAndIndentLevel) { + // Call AlignTokens recursively, skipping over this scope block. + unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i); + i = StoppedAt - 1; + continue; } if (!Matches(Changes[i])) continue; // If there is more than one matching token per line, or if the number of - // preceding commas, or the scope depth, do not match anymore, end the - // sequence. - if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch || - NestingLevel != NestingLevelOfLastMatch) + // preceding commas, do not match anymore, end the sequence. + if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch) AlignCurrentSequence(); CommasBeforeLastMatch = CommasBeforeMatch; - NestingLevelOfLastMatch = NestingLevel; FoundMatchOnLine = true; if (StartOfSequence == 0) @@ -324,8 +375,9 @@ MaxColumn = std::min(MaxColumn, ChangeMaxColumn); } - EndOfSequence = Changes.size(); + EndOfSequence = i; AlignCurrentSequence(); + return i; } void WhitespaceManager::alignConsecutiveAssignments() { @@ -344,7 +396,7 @@ return C.Tok->is(tok::equal); }, - Changes); + Changes, /*StartAt=*/0); } void WhitespaceManager::alignConsecutiveDeclarations() { @@ -357,13 +409,15 @@ // const char* const* v1; // float const* v2; // SomeVeryLongType const& v3; - AlignTokens(Style, [](Change const &C) { - return C.Tok->isOneOf(TT_StartOfName, - TT_FunctionDeclarationName); + // tok::kw_operator is necessary for aligning operator overload + // definitions. + return C.Tok->is(TT_StartOfName) || + C.Tok->is(TT_FunctionDeclarationName) || + C.Tok->is(tok::kw_operator); }, - Changes); + Changes, /*StartAt=*/0); } void WhitespaceManager::alignTrailingComments() { Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -7539,12 +7539,11 @@ "};", Alignment); - // FIXME: Should align all three assignments verifyFormat( "int i = 1;\n" "SomeType a = SomeFunction(looooooooooooooooooooooongParameterA,\n" " loooooooooooooooooooooongParameterB);\n" - "int j = 2;", + "int j = 2;", Alignment); verifyFormat("template 2) ? 3 : 4);\n" "float b[1][] = {{3.f}};\n", Alignment); + verifyFormat("for (int i = 0; i < 1; i++)\n" + " int x = 1;\n", + Alignment); + verifyFormat("for (i = 0; i < 1; i++)\n" + " x = 1;\n" + "y = 1;\n", + Alignment); } TEST_F(FormatTest, AlignConsecutiveDeclarations) { @@ -7626,7 +7632,57 @@ "unsigned oneTwoThree = 123;\n" "int oneTwo = 12;", Alignment)); + // Function prototype alignment + verifyFormat("int a();\n" + "double b();", + Alignment); + verifyFormat("int a(int x);\n" + "double b();", + Alignment); + unsigned OldColumnLimit = Alignment.ColumnLimit; + // We need to set ColumnLimit to zero, in order to stress nested alignments, + // otherwise the function parameters will be re-flowed onto a single line. + Alignment.ColumnLimit = 0; + EXPECT_EQ("int a(int x,\n" + " float y);\n" + "double b(int x,\n" + " double y);", + format("int a(int x,\n" + " float y);\n" + "double b(int x,\n" + " double y);", + Alignment)); + // This ensures that function parameters of function declarations are + // correctly indented when their owning functions are indented. + // The failure case here is for 'double y' to not be indented enough. + EXPECT_EQ("double a(int x);\n" + "int b(int y,\n" + " double z);", + format("double a(int x);\n" + "int b(int y,\n" + " double z);", + Alignment)); + // Set ColumnLimit low so that we induce wrapping immediately after + // the function name and opening paren. + Alignment.ColumnLimit = 13; + verifyFormat("int function(\n" + " int x,\n" + " bool y);", + Alignment); + Alignment.ColumnLimit = OldColumnLimit; + // Ensure function pointers don't screw up recursive alignment + verifyFormat("int a(int x, void (*fp)(int y));\n" + "double b();", + Alignment); Alignment.AlignConsecutiveAssignments = true; + // Ensure recursive alignment is broken by function braces, so that the + // "a = 1" does not align with subsequent assignments inside the function + // body. + verifyFormat("int func(int a = 1) {\n" + " int b = 2;\n" + " int cc = 3;\n" + "}", + Alignment); verifyFormat("float something = 2000;\n" "double another = 911;\n" "int i = 1, j = 10;\n" @@ -7636,6 +7692,28 @@ verifyFormat("int oneTwoThree = {0}; // comment\n" "unsigned oneTwo = 0; // comment", Alignment); + // Make sure that scope is correctly tracked, in the absence of braces + verifyFormat("for (int i = 0; i < n; i++)\n" + " j = i;\n" + "double x = 1;\n", + Alignment); + verifyFormat("if (int i = 0)\n" + " j = i;\n" + "double x = 1;\n", + Alignment); + // Ensure operator[] and operator() are comprehended + verifyFormat("struct test {\n" + " long long int foo();\n" + " int operator[](int a);\n" + " double bar();\n" + "};\n", + Alignment); + verifyFormat("struct test {\n" + " long long int foo();\n" + " int operator()(int a);\n" + " double bar();\n" + "};\n", + Alignment); EXPECT_EQ("void SomeFunction(int parameter = 0) {\n" " int const i = 1;\n" " int * j = 2;\n" @@ -7737,17 +7815,16 @@ Alignment); Alignment.AlignConsecutiveAssignments = false; - // FIXME: Should align all three declarations verifyFormat( "int i = 1;\n" "SomeType a = SomeFunction(looooooooooooooooooooooongParameterA,\n" " loooooooooooooooooooooongParameterB);\n" - "int j = 2;", + "int j = 2;", Alignment); // Test interactions with ColumnLimit and AlignConsecutiveAssignments: // We expect declarations and assignments to align, as long as it doesn't - // exceed the column limit, starting a new alignemnt sequence whenever it + // exceed the column limit, starting a new alignment sequence whenever it // happens. Alignment.AlignConsecutiveAssignments = true; Alignment.ColumnLimit = 30;