diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -2611,6 +2611,8 @@ **BreakStringLiterals** (``Boolean``) :versionbadge:`clang-format 3.9` :ref:`ΒΆ ` Allow breaking string literals when formatting. + In C, C++, and Objective-C: + .. code-block:: c++ true: @@ -2620,7 +2622,20 @@ false: const char* x = - "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + + In Verilog: + + .. code-block:: c++ + + true: + string x = {"veryVeryVeryVeryVeryVe", + "ryVeryVeryVeryVeryVery", + "VeryLongString"}; + + false: + string x = + "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; .. _ColumnLimit: diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -1902,6 +1902,8 @@ bool BreakAfterJavaFieldAnnotations; /// Allow breaking string literals when formatting. + /// + /// In C, C++, and Objective-C: /// \code /// true: /// const char* x = "veryVeryVeryVeryVeryVe" @@ -1910,8 +1912,21 @@ /// /// false: /// const char* x = - /// "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + /// "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + /// \endcode + /// + /// In Verilog: + /// \code + /// true: + /// string x = {"veryVeryVeryVeryVeryVe", + /// "ryVeryVeryVeryVeryVery", + /// "VeryLongString"}; + /// + /// false: + /// string x = + /// "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; /// \endcode + /// /// \version 3.9 bool BreakStringLiterals; diff --git a/clang/lib/Format/BreakableToken.h b/clang/lib/Format/BreakableToken.h --- a/clang/lib/Format/BreakableToken.h +++ b/clang/lib/Format/BreakableToken.h @@ -160,7 +160,7 @@ /// Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, - WhitespaceManager &Whitespaces) const = 0; + WhitespaceManager &Whitespaces) = 0; /// Returns the number of columns needed to format /// \p RemainingTokenColumns, assuming that Split is within the range measured @@ -204,7 +204,7 @@ /// Replaces the whitespace between \p LineIndex-1 and \p LineIndex. virtual void adaptStartOfLine(unsigned LineIndex, - WhitespaceManager &Whitespaces) const {} + WhitespaceManager &Whitespaces) {} /// Returns a whitespace range (offset, length) of the content at /// the last line that needs to be reformatted after the last line has been @@ -220,7 +220,7 @@ /// after the last line has been formatted by performing a reformatting. void replaceWhitespaceAfterLastLine(unsigned TailOffset, Split SplitAfterLastLine, - WhitespaceManager &Whitespaces) const { + WhitespaceManager &Whitespaces) { insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine, /*ContentIndent=*/0, Whitespaces); } @@ -258,7 +258,7 @@ const llvm::Regex &CommentPragmasRegex) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, - WhitespaceManager &Whitespaces) const override; + WhitespaceManager &Whitespaces) override; void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const override {} unsigned getLineCount() const override; @@ -274,7 +274,9 @@ unsigned StartColumn; // The prefix a line needs after a break in the token. StringRef Prefix; - // The postfix a line needs before introducing a break. + // The closing quote of the string. Except in the + // BreakableVerilogStringliteral subclass, it is also the postfix to be added + // to the first part when the string is broken. StringRef Postfix; // The token text excluding the prefix and postfix. StringRef Line; @@ -283,6 +285,34 @@ unsigned UnbreakableTailLength; }; +class BreakableVerilogStringLiteral : public BreakableStringLiteral { +public: + /// Creates a breakable token for a single line Verilog string literal. + /// + /// \p StartColumn specifies the column in which the token will start + /// after formatting. + BreakableVerilogStringLiteral(const FormatToken &Tok, unsigned StartColumn, + unsigned UnbreakableTailLength, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); + Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ContentStartColumn, + const llvm::Regex &CommentPragmasRegex) const override; + void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, + WhitespaceManager &Whitespaces) override; + +protected: + // Whether braces should be inserted around the string to form a + // concatenation. + bool BracesNeeded; + // The braces along with the quotes they replace. Depending on the style. + const StringRef LeftBraceQuote; + const StringRef RightBraceQuote; + // Width added to the left. + unsigned AdditionalStartColumnWidth; +}; + class BreakableComment : public BreakableToken { protected: /// Creates a breakable token for a comment. @@ -373,14 +403,14 @@ unsigned getContentIndent(unsigned LineIndex) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, - WhitespaceManager &Whitespaces) const override; + WhitespaceManager &Whitespaces) override; Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override; void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override; bool introducesBreakBeforeToken() const override; void adaptStartOfLine(unsigned LineIndex, - WhitespaceManager &Whitespaces) const override; + WhitespaceManager &Whitespaces) override; Split getSplitAfterLastLine(unsigned TailOffset) const override; bool mayReflow(unsigned LineIndex, @@ -445,13 +475,13 @@ unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, - WhitespaceManager &Whitespaces) const override; + WhitespaceManager &Whitespaces) override; Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override; void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override; void adaptStartOfLine(unsigned LineIndex, - WhitespaceManager &Whitespaces) const override; + WhitespaceManager &Whitespaces) override; void updateNextToken(LineState &State) const override; bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override; diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -286,12 +286,61 @@ void BreakableStringLiteral::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, - WhitespaceManager &Whitespaces) const { + WhitespaceManager &Whitespaces) { Whitespaces.replaceWhitespaceInToken( Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, Prefix, InPPDirective, 1, StartColumn); } +BreakableVerilogStringLiteral::BreakableVerilogStringLiteral( + const FormatToken &Tok, unsigned StartColumn, + unsigned UnbreakableTailLength, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableStringLiteral(Tok, StartColumn, /*Prefix=*/"\"", + /*Postfix=*/"\"", UnbreakableTailLength, + InPPDirective, Encoding, Style), + BracesNeeded(Tok.isNot(TT_VerilogStringInConcatenation)), + LeftBraceQuote(Style.Cpp11BracedListStyle ? "{\"" : "{ \""), + RightBraceQuote(Style.Cpp11BracedListStyle ? "\"}" : "\" }"), + AdditionalStartColumnWidth(BracesNeeded ? LeftBraceQuote.size() - 1u + : 0u) {} + +BreakableToken::Split BreakableVerilogStringLiteral::getSplit( + unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const { + // 2 is subtracted from the column limit for the closing quote and comma. + return getStringSplit( + Line.substr(TailOffset), ContentStartColumn + AdditionalStartColumnWidth, + std::max(3u, ColumnLimit) - 2, Style.TabWidth, Encoding); +} + +void BreakableVerilogStringLiteral::insertBreak( + unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, WhitespaceManager &Whitespaces) { + if (BracesNeeded) { + // For Verilog, concatenations need to be wrapped in braces. To add a + // brace, we replace the quote with a brace and another quote. This is + // because the rest of the program requires one replacement for each source + // range. If we replace the empty strings around the string, it may + // conflict with whitespace replacements between the string and adjacent + // tokens. + BracesNeeded = false; + Whitespaces.replaceWhitespaceInToken( + Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"", + /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0, + /*Spaces=*/0); + Whitespaces.replaceWhitespaceInToken( + Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1, + /*PreviousPostfix=*/RightBraceQuote, + /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0); + } + Whitespaces.replaceWhitespaceInToken( + Tok, /*Offset=*/1u + TailOffset + Split.first, + /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/"\",", + /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1, + /*Spaces=*/StartColumn + AdditionalStartColumnWidth); +} + BreakableComment::BreakableComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, @@ -597,7 +646,7 @@ void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, - WhitespaceManager &Whitespaces) const { + WhitespaceManager &Whitespaces) { StringRef Text = Content[LineIndex].substr(TailOffset); StringRef Prefix = Decoration; // We need this to account for the case when we have a decoration "* " for all @@ -672,8 +721,8 @@ /*Spaces=*/0); } -void BreakableBlockComment::adaptStartOfLine( - unsigned LineIndex, WhitespaceManager &Whitespaces) const { +void BreakableBlockComment::adaptStartOfLine(unsigned LineIndex, + WhitespaceManager &Whitespaces) { if (LineIndex == 0) { if (DelimitersOnNewline) { // Since we're breaking at index 1 below, the break position and the @@ -909,9 +958,10 @@ return ContentColumn[LineIndex]; } -void BreakableLineCommentSection::insertBreak( - unsigned LineIndex, unsigned TailOffset, Split Split, - unsigned ContentIndent, WhitespaceManager &Whitespaces) const { +void BreakableLineCommentSection::insertBreak(unsigned LineIndex, + unsigned TailOffset, Split Split, + unsigned ContentIndent, + WhitespaceManager &Whitespaces) { StringRef Text = Content[LineIndex].substr(TailOffset); // Compute the offset of the split relative to the beginning of the token // text. @@ -988,7 +1038,7 @@ } void BreakableLineCommentSection::adaptStartOfLine( - unsigned LineIndex, WhitespaceManager &Whitespaces) const { + unsigned LineIndex, WhitespaceManager &Whitespaces) { // If this is the first line of a token, we need to inform Whitespace Manager // about it: either adapt the whitespace range preceding it, or mark it as an // untouchable token. diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -2175,6 +2175,22 @@ return nullptr; StringRef Text = Current.TokenText; + // We need this to address the case where there is an unbreakable tail only + // if certain other formatting decisions have been taken. The + // UnbreakableTailLength of Current is an overapproximation is that case and + // we need to be correct here. + unsigned UnbreakableTailLength = (State.NextToken && canBreak(State)) + ? 0 + : Current.UnbreakableTailLength; + + if (Style.isVerilog()) { + if (Text.size() < 2u || !Text.startswith("\"") || !Text.endswith("\"")) + return nullptr; + return std::make_unique( + Current, StartColumn, UnbreakableTailLength, + State.Line->InPPDirective, Encoding, Style); + } + StringRef Prefix; StringRef Postfix; // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. @@ -2187,13 +2203,6 @@ Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { - // We need this to address the case where there is an unbreakable tail - // only if certain other formatting decisions have been taken. The - // UnbreakableTailLength of Current is an overapproximation is that case - // and we need to be correct here. - unsigned UnbreakableTailLength = (State.NextToken && canBreak(State)) - ? 0 - : Current.UnbreakableTailLength; return std::make_unique( Current, StartColumn, Prefix, Postfix, UnbreakableTailLength, State.Line->InPPDirective, Encoding, Style); @@ -2235,7 +2244,7 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, LineState &State, bool AllowBreak, bool DryRun, bool Strict) { - std::unique_ptr Token = + std::unique_ptr Token = createBreakableToken(Current, State, AllowBreak); if (!Token) return {0, false}; diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -163,6 +163,8 @@ TYPE(VerilogNumberBase) \ /* like `(strong1, pull0)` */ \ TYPE(VerilogStrength) \ + /* A string in a concatenation like `{"some ", "text"}`. */ \ + TYPE(VerilogStringInConcatenation) \ /* Things inside the table in user-defined primitives. */ \ TYPE(VerilogTableItem) \ /* those that separate ports of different types */ \ diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -862,6 +862,11 @@ OpeningBrace.Previous->is(TT_JsTypeColon)) { Contexts.back().IsExpression = false; } + if (Style.isVerilog() && + (!OpeningBrace.getPreviousNonComment() || + OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) { + Contexts.back().VerilogMayBeConcatenation = true; + } unsigned CommaCount = 0; while (CurrentToken) { @@ -1736,6 +1741,9 @@ bool InCpp11AttributeSpecifier = false; bool InCSharpAttributeSpecifier = false; bool VerilogAssignmentFound = false; + // Whether the braces may mean concatenation instead of structure or array + // literal. + bool VerilogMayBeConcatenation = false; enum { Unknown, // Like the part after `:` in a constructor. @@ -2068,6 +2076,14 @@ } else { Current.setType(TT_LineComment); } + } else if (Current.is(tok::string_literal)) { + if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation && + Current.getPreviousNonComment() && + Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) && + Current.getNextNonComment() && + Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) { + Current.setType(TT_VerilogStringInConcatenation); + } } else if (Current.is(tok::l_paren)) { if (lParenStartsCppCast(Current)) Current.setType(TT_CppCastLParen); diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -22,8 +22,13 @@ bool WhitespaceManager::Change::IsBeforeInFile::operator()( const Change &C1, const Change &C2) const { return SourceMgr.isBeforeInTranslationUnit( - C1.OriginalWhitespaceRange.getBegin(), - C2.OriginalWhitespaceRange.getBegin()); + C1.OriginalWhitespaceRange.getBegin(), + C2.OriginalWhitespaceRange.getBegin()) || + (C1.OriginalWhitespaceRange.getBegin() == + C2.OriginalWhitespaceRange.getBegin() && + SourceMgr.isBeforeInTranslationUnit( + C1.OriginalWhitespaceRange.getEnd(), + C2.OriginalWhitespaceRange.getEnd())); } WhitespaceManager::Change::Change(const FormatToken &Tok, @@ -1415,10 +1420,18 @@ void WhitespaceManager::generateChanges() { for (unsigned i = 0, e = Changes.size(); i != e; ++i) { const Change &C = Changes[i]; - if (i > 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() == - C.OriginalWhitespaceRange.getBegin()) { - // Do not generate two replacements for the same location. - continue; + if (i > 0) { + auto Last = Changes[i - 1].OriginalWhitespaceRange; + auto New = Changes[i].OriginalWhitespaceRange; + // Do not generate two replacements for the same location. As a special + // case, it is allowed if there is a replacement for the empty range + // between 2 tokens and another non-empty range at the start of the second + // token. + if (Last.getBegin() == New.getBegin() && + (Last.getEnd() != Last.getBegin() || + New.getEnd() == New.getBegin())) { + continue; + } } if (C.CreateReplacement) { std::string ReplacementText = C.PreviousLinePostfix; diff --git a/clang/unittests/Format/FormatTestVerilog.cpp b/clang/unittests/Format/FormatTestVerilog.cpp --- a/clang/unittests/Format/FormatTestVerilog.cpp +++ b/clang/unittests/Format/FormatTestVerilog.cpp @@ -1155,6 +1155,66 @@ verifyFormat("{<