Index: lib/Format/BreakableToken.h =================================================================== --- lib/Format/BreakableToken.h +++ lib/Format/BreakableToken.h @@ -21,6 +21,7 @@ #include "Encoding.h" #include "TokenAnnotator.h" #include "WhitespaceManager.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/Regex.h" #include @@ -135,6 +136,19 @@ virtual unsigned getContentStartColumn(unsigned LineIndex, bool Break) const = 0; + /// Returns additional content indent required for the second line after the + /// content at line \p LineIndex is broken. + /// + /// For example, Javadoc @param annotations require and indent of 4 spaces and + /// in this example getContentIndex(1) returns 4. + /// /** + /// * @param loooooooooooooong line + /// * continuation + /// */ + virtual unsigned getContentIndent(unsigned LineIndex) const { + return 0; + } + /// Returns a range (offset, length) at which to break the line at /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not /// violate \p ColumnLimit, assuming the text starting at \p TailOffset in @@ -146,6 +160,7 @@ /// Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, WhitespaceManager &Whitespaces) const = 0; /// Returns the number of columns needed to format @@ -210,7 +225,7 @@ Split SplitAfterLastLine, WhitespaceManager &Whitespaces) const { insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine, - Whitespaces); + /*ContentIndent=*/0, Whitespaces); } /// Updates the next token of \p State to the next token after this @@ -245,6 +260,7 @@ unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, WhitespaceManager &Whitespaces) const override; void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const override {} @@ -354,7 +370,9 @@ unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override; unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; + unsigned getContentIndent(unsigned LineIndex) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, WhitespaceManager &Whitespaces) const override; Split getReflowSplit(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; @@ -368,6 +386,10 @@ bool mayReflow(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; + // Contains Javadoc annotations that require additional indent when continued + // on multiple lines. + static const llvm::StringSet<> ContentIndentingJavadocAnnotations; + private: // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex]. // @@ -423,6 +445,7 @@ unsigned StartColumn) const override; unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, WhitespaceManager &Whitespaces) const override; Split getReflowSplit(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; Index: lib/Format/BreakableToken.cpp =================================================================== --- lib/Format/BreakableToken.cpp +++ lib/Format/BreakableToken.cpp @@ -235,6 +235,7 @@ void BreakableStringLiteral::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, WhitespaceManager &Whitespaces) const { Whitespaces.replaceWhitespaceInToken( Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, @@ -510,8 +511,33 @@ return std::max(0, ContentColumn[LineIndex]); } +const llvm::StringSet<> + BreakableBlockComment::ContentIndentingJavadocAnnotations = { + "@param", "@return", "@returns", "@throws", "@type", + "@template", "@see", "@deprecated", "@define", +}; + +unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const { + if (Style.Language != FormatStyle::LK_Java && + Style.Language != FormatStyle::LK_JavaScript) + return 0; + // The content at LineIndex 0 of a comment like: + // /** line 0 */ + // is "* line 0", so we need to skip over the decoration in that case. + StringRef ContentWithNoDecoration = Content[LineIndex]; + if (LineIndex == 0 && ContentWithNoDecoration.startswith("*")) { + ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks); + } + StringRef FirstWord = ContentWithNoDecoration.substr( + 0, ContentWithNoDecoration.find_first_of(Blanks)); + if (ContentIndentingJavadocAnnotations.find(FirstWord) != + ContentIndentingJavadocAnnotations.end()) + return Style.ContinuationIndentWidth; + return 0; +} + void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, - Split Split, + Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const { StringRef Text = Content[LineIndex].substr(TailOffset); StringRef Prefix = Decoration; @@ -532,10 +558,14 @@ Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; unsigned CharsToRemove = Split.second; assert(LocalIndentAtLineBreak >= Prefix.size()); + std::string PrefixWithTrailingIndent = Prefix; + for (unsigned I = 0; I < ContentIndent; ++I) + PrefixWithTrailingIndent += " "; Whitespaces.replaceWhitespaceInToken( - tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", Prefix, - InPPDirective, /*Newlines=*/1, - /*Spaces=*/LocalIndentAtLineBreak - Prefix.size()); + tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", + PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1, + /*Spaces=*/LocalIndentAtLineBreak + ContentIndent - + PrefixWithTrailingIndent.size()); } BreakableToken::Split @@ -543,8 +573,17 @@ llvm::Regex &CommentPragmasRegex) const { if (!mayReflow(LineIndex, CommentPragmasRegex)) return Split(StringRef::npos, 0); - + + // If we're reflowing into a line with content indent, only reflow the next + // line if its starting whitespace matches the content indent. size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); + if (LineIndex) { + unsigned PreviousContentIndent = getContentIndent(LineIndex - 1); + if (PreviousContentIndent && Trimmed != StringRef::npos && + Trimmed != PreviousContentIndent) + return Split(StringRef::npos, 0); + } + return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); } @@ -583,7 +622,8 @@ // break length are the same. size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks); if (BreakLength != StringRef::npos) - insertBreak(LineIndex, 0, Split(1, BreakLength), Whitespaces); + insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0, + Whitespaces); } return; } @@ -754,7 +794,7 @@ void BreakableLineCommentSection::insertBreak( unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) const { + unsigned ContentIndent, WhitespaceManager &Whitespaces) const { StringRef Text = Content[LineIndex].substr(TailOffset); // Compute the offset of the split relative to the beginning of the token // text. Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -1782,6 +1782,7 @@ if (!DryRun) Token->adaptStartOfLine(0, Whitespaces); + unsigned ContentIndent = 0; unsigned Penalty = 0; LLVM_DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn << ".\n"); @@ -1903,11 +1904,28 @@ } } LLVM_DEBUG(llvm::dbgs() << " Breaking...\n"); - ContentStartColumn = - Token->getContentStartColumn(LineIndex, /*Break=*/true); + // Update the ContentIndent only if the current line was not reflown with + // the previous line, since in that case the previous line should still + // determine the ContentIndent. Also never intent the last line. + if (!Reflow) + ContentIndent = Token->getContentIndent(LineIndex); + LLVM_DEBUG(llvm::dbgs() + << " ContentIndent: " << ContentIndent << "\n"); + ContentStartColumn = ContentIndent + Token->getContentStartColumn( + LineIndex, /*Break=*/true); + unsigned NewRemainingTokenColumns = Token->getRemainingLength( LineIndex, TailOffset + Split.first + Split.second, ContentStartColumn); + if (NewRemainingTokenColumns == 0) { + // No content to indent. + ContentIndent = 0; + ContentStartColumn = + Token->getContentStartColumn(LineIndex, /*Break=*/true); + NewRemainingTokenColumns = Token->getRemainingLength( + LineIndex, TailOffset + Split.first + Split.second, + ContentStartColumn); + } // When breaking before a tab character, it may be moved by a few columns, // but will still be expanded to the next tab stop, so we don't save any @@ -1921,7 +1939,8 @@ LLVM_DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first << ", " << Split.second << "\n"); if (!DryRun) - Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); + Token->insertBreak(LineIndex, TailOffset, Split, ContentIndent, + Whitespaces); Penalty += NewBreakPenalty; TailOffset += Split.first + Split.second; Index: lib/Format/Format.cpp =================================================================== --- lib/Format/Format.cpp +++ lib/Format/Format.cpp @@ -808,10 +808,10 @@ GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; GoogleStyle.BreakBeforeTernaryOperators = false; - // taze:, triple slash directives (`/// <...`), @tag followed by { for a lot - // of JSDoc tags, and @see, which is commonly followed by overlong URLs. - GoogleStyle.CommentPragmas = - "(taze:|^/[ \t]*<|(@[A-Za-z_0-9-]+[ \\t]*{)|@see)"; + // taze:, triple slash directives (`/// <...`), @see, which is commonly + // followed by overlong URLs, and @exports and @module, which are followed + // by qualified identifiers. + GoogleStyle.CommentPragmas = "(taze:|^/[ \t]*<|@see|@exports|@module|@mods)"; GoogleStyle.MaxEmptyLinesToKeep = 3; GoogleStyle.NamespaceIndentation = FormatStyle::NI_All; GoogleStyle.SpacesInContainerLiterals = false; Index: unittests/Format/FormatTestComments.cpp =================================================================== --- unittests/Format/FormatTestComments.cpp +++ unittests/Format/FormatTestComments.cpp @@ -3105,6 +3105,106 @@ // clang-format on } +TEST_F(FormatTestComments, IndentsLongJavadocAnnotatedLines) { + FormatStyle Style = getGoogleStyle(FormatStyle::LK_Java); + Style.ColumnLimit = 60; + FormatStyle Style20 = getGoogleStyle(FormatStyle::LK_Java); + Style20.ColumnLimit = 20; + EXPECT_EQ( + "/**\n" + " * @param x long long long long long long long long long\n" + " * long\n" + " */\n", + format("/**\n" + " * @param x long long long long long long long long long long\n" + " */\n", + Style)); + EXPECT_EQ("/**\n" + " * @param x long long long long long long long long long\n" + " * long long long long long long long long long long\n" + " */\n", + format("/**\n" + " * @param x long long long long long long long long long " + "long long long long long long long long long long\n" + " */\n", + Style)); + EXPECT_EQ("/**\n" + " * @param x long long long long long long long long long\n" + " * long long long long long long long long long long\n" + " * long\n" + " */\n", + format("/**\n" + " * @param x long long long long long long long long long " + "long long long long long long long long long long long\n" + " */\n", + Style)); + EXPECT_EQ( + "/**\n" + " * Sentence that\n" + " * should be broken.\n" + " * @param short\n" + " * keep indentation\n" + " */\n", format( + "/**\n" + " * Sentence that should be broken.\n" + " * @param short\n" + " * keep indentation\n" + " */\n", Style20)); + + EXPECT_EQ("/**\n" + " * @param l1 long1\n" + " * to break\n" + " * @param l2 long2\n" + " * to break\n" + " */\n", + format("/**\n" + " * @param l1 long1 to break\n" + " * @param l2 long2 to break\n" + " */\n", + Style20)); + + EXPECT_EQ("/**\n" + " * @param xx to\n" + " * break\n" + " * no reflow\n" + " */\n", + format("/**\n" + " * @param xx to break\n" + " * no reflow\n" + " */\n", + Style20)); + + EXPECT_EQ("/**\n" + " * @param xx to\n" + " * break yes\n" + " * reflow\n" + " */\n", + format("/**\n" + " * @param xx to break\n" + " * yes reflow\n" + " */\n", + Style20)); + + FormatStyle JSStyle20 = getGoogleStyle(FormatStyle::LK_JavaScript); + JSStyle20.ColumnLimit = 20; + EXPECT_EQ("/**\n" + " * @param l1 long1\n" + " * to break\n" + " */\n", + format("/**\n" + " * @param l1 long1 to break\n" + " */\n", + JSStyle20)); + EXPECT_EQ("/**\n" + " * @param {l1 long1\n" + " * to break}\n" + " */\n", + format("/**\n" + " * @param {l1 long1 to break}\n" + " */\n", + JSStyle20)); +} + } // end namespace } // end namespace format } // end namespace clang Index: unittests/Format/FormatTestJS.cpp =================================================================== --- unittests/Format/FormatTestJS.cpp +++ unittests/Format/FormatTestJS.cpp @@ -191,19 +191,28 @@ // Break a single line long jsdoc comment pragma. EXPECT_EQ("/**\n" - " * @returns {string} jsdoc line 12\n" + " * @returns\n" + " * {string}\n" + " * jsdoc line 12\n" " */", format("/** @returns {string} jsdoc line 12 */", getGoogleJSStyleWithColumns(20))); EXPECT_EQ("/**\n" - " * @returns {string} jsdoc line 12\n" + " * @returns\n" + " * {string}\n" + " * jsdoc line 12\n" " */", format("/** @returns {string} jsdoc line 12 */", getGoogleJSStyleWithColumns(20))); + // FIXME: this overcounts the */ as a continuation of the 12 when breaking. + // Related to the FIXME in BreakableBlockComment::getRangeLength. EXPECT_EQ("/**\n" - " * @returns {string} jsdoc line 12\n" + " * @returns\n" + " * {string}\n" + " * jsdoc line\n" + " * 12\n" " */", format("/** @returns {string} jsdoc line 12*/", getGoogleJSStyleWithColumns(20))); @@ -212,7 +221,8 @@ EXPECT_EQ("/**\n" " * line 1\n" " * line 2\n" - " * @returns {string} jsdoc line 12\n" + " * @returns {string}\n" + " * jsdoc line 12\n" " */", format("/** line 1\n" " * line 2\n" @@ -2028,10 +2038,10 @@ TEST_F(FormatTestJS, JSDocAnnotations) { verifyFormat("/**\n" - " * @export {this.is.a.long.path.to.a.Type}\n" + " * @exports {this.is.a.long.path.to.a.Type}\n" " */", "/**\n" - " * @export {this.is.a.long.path.to.a.Type}\n" + " * @exports {this.is.a.long.path.to.a.Type}\n" " */", getGoogleJSStyleWithColumns(20)); verifyFormat("/**\n" @@ -2042,7 +2052,8 @@ " */", getGoogleJSStyleWithColumns(20)); verifyFormat("/**\n" - " * @param {this.is.a.long.path.to.a.Type}\n" + " * @param\n" + " * {this.is.a.long.path.to.a.Type}\n" " */", "/**\n" " * @param {this.is.a.long.path.to.a.Type}\n" @@ -2058,34 +2069,36 @@ verifyFormat( "/**\n" " * @param This is a\n" - " * long comment but\n" - " * no type\n" + " * long comment\n" + " * but no type\n" " */", "/**\n" " * @param This is a long comment but no type\n" " */", getGoogleJSStyleWithColumns(20)); - // Don't break @param line, but reindent it and reflow unrelated lines. - verifyFormat("{\n" - " /**\n" - " * long long long\n" - " * long\n" - " * @param {this.is.a.long.path.to.a.Type} a\n" - " * long long long\n" - " * long long\n" - " */\n" - " function f(a) {}\n" - "}", - "{\n" - "/**\n" - " * long long long long\n" - " * @param {this.is.a.long.path.to.a.Type} a\n" - " * long long long long\n" - " * long\n" - " */\n" - " function f(a) {}\n" - "}", - getGoogleJSStyleWithColumns(20)); + // Break and reindent @param line and reflow unrelated lines. + EXPECT_EQ("{\n" + " /**\n" + " * long long long\n" + " * long\n" + " * @param\n" + " * {this.is.a.long.path.to.a.Type}\n" + " * a\n" + " * long long long\n" + " * long long\n" + " */\n" + " function f(a) {}\n" + "}", + format("{\n" + "/**\n" + " * long long long long\n" + " * @param {this.is.a.long.path.to.a.Type} a\n" + " * long long long long\n" + " * long\n" + " */\n" + " function f(a) {}\n" + "}", + getGoogleJSStyleWithColumns(20))); } TEST_F(FormatTestJS, RequoteStringsSingle) {