diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -2722,6 +2722,8 @@ **BreakStringLiterals** (``Boolean``) :versionbadge:`clang-format 3.9` :ref:`ΒΆ ` Allow breaking string literals when formatting. + In C, C++, and Objective-C: + .. code-block:: c++ true: @@ -2731,7 +2733,34 @@ false: const char* x = - "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + + In C#, Java, and JavaScript: + + .. code-block:: c++ + + true: + var x = "veryVeryVeryVeryVeryVe" + + "ryVeryVeryVeryVeryVery" + + "VeryLongString"; + + false: + var x = + "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + C# and JavaScript interpolated strings are not broken. + + In Verilog: + + .. code-block:: c++ + + true: + string x = {"veryVeryVeryVeryVeryVe", + "ryVeryVeryVeryVeryVery", + "VeryLongString"}; + + false: + string x = + "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; .. _ColumnLimit: diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -2008,6 +2008,8 @@ bool BreakAfterJavaFieldAnnotations; /// Allow breaking string literals when formatting. + /// + /// In C, C++, and Objective-C: /// \code /// true: /// const char* x = "veryVeryVeryVeryVeryVe" @@ -2016,8 +2018,34 @@ /// /// false: /// const char* x = - /// "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + /// "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + /// \endcode + /// + /// In C#, Java, and JavaScript: + /// \code + /// true: + /// var x = "veryVeryVeryVeryVeryVe" + + /// "ryVeryVeryVeryVeryVery" + + /// "VeryLongString"; + /// + /// false: + /// var x = + /// "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; /// \endcode + /// C# and JavaScript interpolated strings are not broken. + /// + /// In Verilog: + /// \code + /// true: + /// string x = {"veryVeryVeryVeryVeryVe", + /// "ryVeryVeryVeryVeryVery", + /// "VeryLongString"}; + /// + /// false: + /// string x = + /// "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + /// \endcode + /// /// \version 3.9 bool BreakStringLiterals; diff --git a/clang/lib/Format/BreakableToken.h b/clang/lib/Format/BreakableToken.h --- a/clang/lib/Format/BreakableToken.h +++ b/clang/lib/Format/BreakableToken.h @@ -230,6 +230,11 @@ /// as a unit and is responsible for the formatting of the them. virtual void updateNextToken(LineState &State) const {} + /// Adds replacements that are needed when the token is broken. Such as + /// wrapping a JavaScript string in parentheses after it gets broken with plus + /// signs. + virtual void updateAfterBroken(WhitespaceManager &Whitespaces) const {} + protected: BreakableToken(const FormatToken &Tok, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) @@ -283,6 +288,44 @@ unsigned UnbreakableTailLength; }; +class BreakableStringLiteralUsingOperators : public BreakableStringLiteral { +public: + enum QuoteStyleType { + DoubleQuotes, // The string is quoted with double quotes. + SingleQuotes, // The JavaScript string is quoted with single quotes. + AtDoubleQuotes, // The C# verbatim string is quoted with the at sign and + // double quotes. + }; + /// Creates a breakable token for a single line string literal for C#, Java, + /// JavaScript, or Verilog. + /// + /// \p StartColumn specifies the column in which the token will start + /// after formatting. + BreakableStringLiteralUsingOperators( + const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, + unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style); + unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, + unsigned StartColumn) const override; + unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; + void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, + WhitespaceManager &Whitespaces) const override; + void updateAfterBroken(WhitespaceManager &Whitespaces) const override; + +protected: + // Whether braces or parentheses should be inserted around the string to form + // a concatenation. + bool BracesNeeded; + QuoteStyleType QuoteStyle; + // The braces or parentheses along with the first character which they + // replace, either a quote or at sign. + StringRef LeftBraceQuote; + StringRef RightBraceQuote; + // Width added to the left due to the added. Does not apply to the first line. + int ContinuationIndent; +}; + class BreakableComment : public BreakableToken { protected: /// Creates a breakable token for a comment. diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -292,6 +292,120 @@ Prefix, InPPDirective, 1, StartColumn); } +BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators( + const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, + unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableStringLiteral( + Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'" + : QuoteStyle == AtDoubleQuotes ? "@\"" + : "\"", + /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"", + UnbreakableTailLength, InPPDirective, Encoding, Style), + BracesNeeded(Tok.isNot(TT_StringInConcatenation)), + QuoteStyle(QuoteStyle) { + // Find the replacement text for inserting braces and quotes and line breaks. + // We don't create an allocated string concatenated from parts here because it + // has to outlive the BreakableStringliteral object. The brace replacements + // include a quote so that WhitespaceManager can tell it apart from whitespace + // replacements between the string and surrounding tokens. + + // The option is not implemented in JavaScript. + bool SignOnNewLine = + !Style.isJavaScript() && + Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None; + + if (Style.isVerilog()) { + // In Verilog, all strings are quoted by double quotes, joined by commas, + // and wrapped in braces. The comma is always before the newline. + assert(QuoteStyle == DoubleQuotes); + LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \""; + RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }"; + Postfix = "\","; + Prefix = "\""; + } else { + // The plus sign may be on either line. And also C# and JavaScript have + // different quoting styles. + if (QuoteStyle == SingleQuotes) { + LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('"; + RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')"; + Postfix = SignOnNewLine ? "'" : "' +"; + Prefix = SignOnNewLine ? "+ '" : "'"; + } else { + if (QuoteStyle == AtDoubleQuotes) { + LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@"; + Prefix = SignOnNewLine ? "+ @\"" : "@\""; + } else { + LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\""; + Prefix = SignOnNewLine ? "+ \"" : "\""; + } + RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")"; + Postfix = SignOnNewLine ? "\"" : "\" +"; + } + } + + // Following lines are indented by the width of the brace and space if any. + ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0; + // The plus sign may need to be unindented depending on the style. + // FIXME: Add support for DontAlign. + if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus && + Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) { + ContinuationIndent -= 2; + } +} + +unsigned BreakableStringLiteralUsingOperators::getRemainingLength( + unsigned LineIndex, unsigned Offset, unsigned StartColumn) const { + return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) + + encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn, + Style.TabWidth, Encoding); +} + +unsigned +BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex, + bool Break) const { + return std::max( + 0, + static_cast(StartColumn) + + (Break ? ContinuationIndent + static_cast(Prefix.size()) + : (BracesNeeded ? static_cast(LeftBraceQuote.size()) - 1 + : 0) + + (QuoteStyle == AtDoubleQuotes ? 2 : 1))); +} + +void BreakableStringLiteralUsingOperators::insertBreak( + unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, WhitespaceManager &Whitespaces) const { + Whitespaces.replaceWhitespaceInToken( + Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset + + Split.first, + /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix, + /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1, + /*Spaces=*/ + std::max(0, static_cast(StartColumn) + ContinuationIndent)); +} + +void BreakableStringLiteralUsingOperators::updateAfterBroken( + WhitespaceManager &Whitespaces) const { + // Add the braces required for breaking the token if they are needed. + if (!BracesNeeded) + return; + + // To add a brace or parenthesis, we replace the quote (or the at sign) with a + // brace and another quote. This is because the rest of the program requires + // one replacement for each source range. If we replace the empty strings + // around the string, it may conflict with whitespace replacements between the + // string and adjacent tokens. + Whitespaces.replaceWhitespaceInToken( + Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"", + /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0, + /*Spaces=*/0); + Whitespaces.replaceWhitespaceInToken( + Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1, + /*PreviousPostfix=*/RightBraceQuote, + /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0); +} + BreakableComment::BreakableComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -36,6 +36,14 @@ return Style.IndentWrappedFunctionNames || LineType == LT_ObjCMethodDecl; } +// Returns true if a binary operator following \p Tok should be unindented when +// the style permits it. +static bool shouldUnindentNextOperator(const FormatToken &Tok) { + const FormatToken *Previous = Tok.getPreviousNonComment(); + return Previous && (Previous->getPrecedence() == prec::Assignment || + Previous->isOneOf(tok::kw_return, TT_RequiresClause)); +} + // Returns the length of everything up to the first possible line break after // the ), ], } or > matching \c Tok. static unsigned getLengthToMatchingParen(const FormatToken &Tok, @@ -1616,11 +1624,10 @@ if (Previous && Previous->endsSequence(tok::l_paren, tok::kw__Generic)) NewParenState.Indent = CurrentState.LastSpace; - if (Previous && - (Previous->getPrecedence() == prec::Assignment || - Previous->isOneOf(tok::kw_return, TT_RequiresClause) || - (PrecedenceLevel == prec::Conditional && Previous->is(tok::question) && - Previous->is(TT_ConditionalExpr))) && + if ((shouldUnindentNextOperator(Current) || + (Previous && + (PrecedenceLevel == prec::Conditional && + Previous->is(tok::question) && Previous->is(TT_ConditionalExpr)))) && !Newline) { // If BreakBeforeBinaryOperators is set, un-indent a bit to account for // the operator and keep the operands aligned. @@ -2183,14 +2190,9 @@ LineState &State, bool AllowBreak) { unsigned StartColumn = State.Column - Current.ColumnWidth; if (Current.isStringLiteral()) { - // FIXME: String literal breaking is currently disabled for C#, Java, Json - // and JavaScript, as it requires strings to be merged using "+" which we - // don't support. - if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || - Style.isCSharp() || Style.isJson() || !Style.BreakStringLiterals || - !AllowBreak) { + // Strings in JSON can not be broken. + if (Style.isJson() || !Style.BreakStringLiterals || !AllowBreak) return nullptr; - } // Don't break string literals inside preprocessor directives (except for // #define directives, as their contents are stored in separate lines and @@ -2209,6 +2211,33 @@ return nullptr; StringRef Text = Current.TokenText; + // We need this to address the case where there is an unbreakable tail only + // if certain other formatting decisions have been taken. The + // UnbreakableTailLength of Current is an overapproximation is that case and + // we need to be correct here. + unsigned UnbreakableTailLength = (State.NextToken && canBreak(State)) + ? 0 + : Current.UnbreakableTailLength; + + if (Style.isVerilog() || Style.Language == FormatStyle::LK_Java || + Style.isJavaScript() || Style.isCSharp()) { + BreakableStringLiteralUsingOperators::QuoteStyleType QuoteStyle; + if (Style.isJavaScript() && Text.startswith("'") && Text.endswith("'")) { + QuoteStyle = BreakableStringLiteralUsingOperators::SingleQuotes; + } else if (Style.isCSharp() && Text.startswith("@\"") && + Text.endswith("\"")) { + QuoteStyle = BreakableStringLiteralUsingOperators::AtDoubleQuotes; + } else if (Text.startswith("\"") && Text.endswith("\"")) { + QuoteStyle = BreakableStringLiteralUsingOperators::DoubleQuotes; + } else { + return nullptr; + } + return std::make_unique( + Current, QuoteStyle, + /*UnindentPlus=*/shouldUnindentNextOperator(Current), StartColumn, + UnbreakableTailLength, State.Line->InPPDirective, Encoding, Style); + } + StringRef Prefix; StringRef Postfix; // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. @@ -2221,13 +2250,6 @@ Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { - // We need this to address the case where there is an unbreakable tail - // only if certain other formatting decisions have been taken. The - // UnbreakableTailLength of Current is an overapproximation is that case - // and we need to be correct here. - unsigned UnbreakableTailLength = (State.NextToken && canBreak(State)) - ? 0 - : Current.UnbreakableTailLength; return std::make_unique( Current, StartColumn, Prefix, Postfix, UnbreakableTailLength, State.Line->InPPDirective, Encoding, Style); @@ -2628,6 +2650,9 @@ Current.UnbreakableTailLength; if (BreakInserted) { + if (!DryRun) + Token->updateAfterBroken(Whitespaces); + // If we break the token inside a parameter list, we need to break before // the next parameter on all levels, so that the next parameter is clearly // visible. Line comments already introduce a break. diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -134,6 +134,11 @@ TYPE(StartOfName) \ TYPE(StatementAttributeLikeMacro) \ TYPE(StatementMacro) \ + /* A string that is part of a string concatenation. For C#, JavaScript, and \ + * Java, it is used for marking whether a string needs parentheses around it \ + * if it is to be split into parts joined by `+`. For Verilog, whether \ + * braces need to be added to split it. Not used for other languages. */ \ + TYPE(StringInConcatenation) \ TYPE(StructLBrace) \ TYPE(StructuredBindingLSquare) \ TYPE(TemplateCloser) \ diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -863,6 +863,11 @@ OpeningBrace.Previous->is(TT_JsTypeColon)) { Contexts.back().IsExpression = false; } + if (Style.isVerilog() && + (!OpeningBrace.getPreviousNonComment() || + OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) { + Contexts.back().VerilogMayBeConcatenation = true; + } unsigned CommaCount = 0; while (CurrentToken) { @@ -1737,6 +1742,9 @@ bool InCpp11AttributeSpecifier = false; bool InCSharpAttributeSpecifier = false; bool VerilogAssignmentFound = false; + // Whether the braces may mean concatenation instead of structure or array + // literal. + bool VerilogMayBeConcatenation = false; enum { Unknown, // Like the part after `:` in a constructor. @@ -2069,6 +2077,14 @@ } else { Current.setType(TT_LineComment); } + } else if (Current.is(tok::string_literal)) { + if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation && + Current.getPreviousNonComment() && + Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) && + Current.getNextNonComment() && + Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) { + Current.setType(TT_StringInConcatenation); + } } else if (Current.is(tok::l_paren)) { if (lParenStartsCppCast(Current)) Current.setType(TT_CppCastLParen); @@ -2739,6 +2755,19 @@ Start = Current; } + if ((Style.isCSharp() || Style.isJavaScript() || + Style.Language == FormatStyle::LK_Java) && + Precedence == prec::Additive && Current) { + // A string can be broken without parentheses around it when it is + // already in a sequence of strings joined by `+` signs. + FormatToken *Prev = Current->getPreviousNonComment(); + if (Prev && Prev->is(tok::string_literal) && + (Prev == Start || Prev->endsSequence(tok::string_literal, tok::plus, + TT_StringInConcatenation))) { + Prev->setType(TT_StringInConcatenation); + } + } + // At the end of the line or when an operator with lower precedence is // found, insert fake parenthesis and return. if (!Current || diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -22,8 +22,13 @@ bool WhitespaceManager::Change::IsBeforeInFile::operator()( const Change &C1, const Change &C2) const { return SourceMgr.isBeforeInTranslationUnit( - C1.OriginalWhitespaceRange.getBegin(), - C2.OriginalWhitespaceRange.getBegin()); + C1.OriginalWhitespaceRange.getBegin(), + C2.OriginalWhitespaceRange.getBegin()) || + (C1.OriginalWhitespaceRange.getBegin() == + C2.OriginalWhitespaceRange.getBegin() && + SourceMgr.isBeforeInTranslationUnit( + C1.OriginalWhitespaceRange.getEnd(), + C2.OriginalWhitespaceRange.getEnd())); } WhitespaceManager::Change::Change(const FormatToken &Tok, @@ -1509,10 +1514,55 @@ void WhitespaceManager::generateChanges() { for (unsigned i = 0, e = Changes.size(); i != e; ++i) { const Change &C = Changes[i]; - if (i > 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() == - C.OriginalWhitespaceRange.getBegin()) { - // Do not generate two replacements for the same location. - continue; + if (i > 0) { + auto Last = Changes[i - 1].OriginalWhitespaceRange; + auto New = Changes[i].OriginalWhitespaceRange; + // Do not generate two replacements for the same location. As a special + // case, it is allowed if there is a replacement for the empty range + // between 2 tokens and another non-empty range at the start of the second + // token. We didn't implement logic to combine replacements for 2 + // consecutive source ranges into a single replacement, because the + // program works fine without it. + // + // We can't eliminate empty original whitespace ranges. They appear when + // 2 tokens have no whitespace in between in the input. It does not + // matter whether whitespace is to be added. If no whitespace is to be + // added, the replacement will be empty, and it gets eliminated after this + // step in storeReplacement. For example, if the input is `foo();`, + // there will be a replacement for the range between every consecutive + // pair of tokens. + // + // A replacement at the start of a token can be added by + // BreakableStringLiteralUsingOperators::insertBreak when it adds braces + // around the string literal. Say Verilog code is being formatted and the + // first line is to become the next 2 lines. + // x("long string"); + // x({"long ", + // "string"}); + // There will be a replacement for the empty range between the parenthesis + // and the string and another replacement for the quote character. The + // replacement for the empty range between the parenthesis and the quote + // comes from ContinuationIndenter::addTokenOnCurrentLine when it changes + // the original empty range between the parenthesis and the string to + // another empty one. The replacement for the quote character comes from + // BreakableStringLiteralUsingOperators::insertBreak when it adds the + // brace. In the example, the replacement for the empty range is the same + // as the original text. However, eliminating replacements that are same + // as the original does not help in general. For example, a newline can + // be inserted, causing the first line to become the next 3 lines. + // xxxxxxxxxxx("long string"); + // xxxxxxxxxxx( + // {"long ", + // "string"}); + // In that case, the empty range between the parenthesis and the string + // will be replaced by a newline and 4 spaces. So we will still have to + // deal with a replacement for an empty source range followed by a + // replacement for a non-empty source range. + if (Last.getBegin() == New.getBegin() && + (Last.getEnd() != Last.getBegin() || + New.getEnd() == New.getBegin())) { + continue; + } } if (C.CreateReplacement) { std::string ReplacementText = C.PreviousLinePostfix; diff --git a/clang/unittests/Format/FormatTestCSharp.cpp b/clang/unittests/Format/FormatTestCSharp.cpp --- a/clang/unittests/Format/FormatTestCSharp.cpp +++ b/clang/unittests/Format/FormatTestCSharp.cpp @@ -6,18 +6,21 @@ // //===----------------------------------------------------------------------===// -#include "FormatTestUtils.h" -#include "clang/Format/Format.h" -#include "llvm/Support/Debug.h" -#include "gtest/gtest.h" +#include "FormatTestBase.h" #define DEBUG_TYPE "format-test" namespace clang { namespace format { +namespace test { +namespace { -class FormatTestCSharp : public ::testing::Test { +class FormatTestCSharp : public test::FormatTestBase { protected: + FormatStyle getDefaultStyle() const override { + return getMicrosoftStyle(FormatStyle::LK_CSharp); + } + static std::string format(llvm::StringRef Code, unsigned Offset, unsigned Length, const FormatStyle &Style) { LLVM_DEBUG(llvm::errs() << "---\n"); @@ -41,13 +44,6 @@ Style.ColumnLimit = ColumnLimit; return Style; } - - static void verifyFormat( - llvm::StringRef Code, - const FormatStyle &Style = getMicrosoftStyle(FormatStyle::LK_CSharp)) { - EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable"; - EXPECT_EQ(Code.str(), format(test::messUp(Code), Style)); - } }; TEST_F(FormatTestCSharp, CSharpClass) { @@ -129,9 +125,65 @@ } TEST_F(FormatTestCSharp, NoStringLiteralBreaks) { + // Breaking of interpolated strings is not implemented. + auto Style = getDefaultStyle(); + Style.ColumnLimit = 40; + Style.BreakStringLiterals = true; + verifyFormat("foo(" + "$\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaa\");", + Style); +} + +TEST_F(FormatTestCSharp, StringLiteralBreaks) { + // The line is 75 characters long. The default limit for the Microsoft style + // is 120. + auto Style = getDefaultStyle(); + Style.BreakStringLiterals = true; verifyFormat("foo(" "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - "aaaaaa\");"); + "aaaaaa\");", + Style); + // When the column limit is smaller, the string should get broken. + Style.ColumnLimit = 40; + verifyFormat(R"(foo("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + + "aaa");)", + "foo(" + "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaa\");", + Style); + // The new quotes should be the same as the original. + verifyFormat(R"(foo(@"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + + @"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + + @"aaaaa");)", + "foo(" + "@\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaa\");", + Style); + // The operators can be on either line. + Style.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; + verifyFormat(R"(foo("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + + "a");)", + "foo(" + "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaa\");", + Style); + Style.AlignOperands = FormatStyle::OAS_AlignAfterOperator; + verifyFormat(R"(foo("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + + "a");)", + "foo(" + "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaa\");", + Style); + verifyFormat(R"(x = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";)", + "x = " + "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaa\";", + Style); } TEST_F(FormatTestCSharp, CSharpVerbatiumStringLiterals) { @@ -166,7 +218,7 @@ } TEST_F(FormatTestCSharp, CSharpFatArrows) { - verifyFormat("Task serverTask = Task.Run(async() => {"); + verifyIncompleteFormat("Task serverTask = Task.Run(async() => {"); verifyFormat("public override string ToString() => \"{Name}\\{Age}\";"); } @@ -282,7 +334,7 @@ "listening on provided host\")]\n" "public string Host { set; get; }"); - verifyFormat( + verifyIncompleteFormat( "[DllImport(\"Hello\", EntryPoint = \"hello_world\")]\n" "// The const char* returned by hello_world must not be deleted.\n" "private static extern IntPtr HelloFromCpp();)"); @@ -1138,7 +1190,8 @@ Style); verifyFormat(R"(Apply(x => x.Name, x => () => x.ID);)", Style); verifyFormat(R"(bool[] xs = { true, true };)", Style); - verifyFormat(R"(taskContext.Factory.Run(async () => doThing(args);)", Style); + verifyIncompleteFormat( + R"(taskContext.Factory.Run(async () => doThing(args);)", Style); verifyFormat(R"(catch (TestException) when (innerFinallyExecuted))", Style); verifyFormat(R"(private float[,] Values;)", Style); verifyFormat(R"(Result this[Index x] => Foo(x);)", Style); @@ -1612,5 +1665,7 @@ EXPECT_NE("", format("int where b <")); // reduced from crasher } +} // namespace +} // namespace test } // namespace format -} // end namespace clang +} // namespace clang diff --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp --- a/clang/unittests/Format/FormatTestJS.cpp +++ b/clang/unittests/Format/FormatTestJS.cpp @@ -1505,6 +1505,97 @@ TEST_F(FormatTestJS, StringLiteralConcatenation) { verifyFormat("var literal = 'hello ' +\n" " 'world';"); + + // Long strings should be broken. + verifyFormat("var literal =\n" + " 'xxxxxxxx ' +\n" + " 'xxxxxxxx';", + "var literal = 'xxxxxxxx xxxxxxxx';", + getGoogleJSStyleWithColumns(17)); + verifyFormat("var literal =\n" + " 'xxxxxxxx ' +\n" + " 'xxxxxxxx';", + "var literal = 'xxxxxxxx xxxxxxxx';", + getGoogleJSStyleWithColumns(18)); + verifyFormat("var literal =\n" + " 'xxxxxxxx' +\n" + " ' xxxxxxxx';", + "var literal = 'xxxxxxxx xxxxxxxx';", + getGoogleJSStyleWithColumns(16)); + // The quotes should be correct. + for (char OriginalQuote : {'\'', '"'}) { + auto VerifyQuotes = [=](FormatStyle::JavaScriptQuoteStyle StyleQuote, + char TargetQuote) { + auto Style = getGoogleJSStyleWithColumns(17); + Style.JavaScriptQuotes = StyleQuote; + SmallString<48> Target{"var literal =\n" + " \"xxxxxxxx \" +\n" + " \"xxxxxxxx\";"}; + SmallString<34> Original{"var literal = \"xxxxxxxx xxxxxxxx\";"}; + std::replace(Target.begin(), Target.end(), '"', TargetQuote); + std::replace(Original.begin(), Original.end(), '"', OriginalQuote); + verifyFormat(Target, Original, Style); + }; + VerifyQuotes(FormatStyle::JSQS_Leave, OriginalQuote); + VerifyQuotes(FormatStyle::JSQS_Single, '\''); + VerifyQuotes(FormatStyle::JSQS_Double, '"'); + } + // Parentheses should be added when necessary. + verifyFormat("var literal =\n" + " ('xxxxxxxx ' +\n" + " 'xxxxxx')[0];", + "var literal = 'xxxxxxxx xxxxxx'[0];", + getGoogleJSStyleWithColumns(18)); + auto Style = getGoogleJSStyleWithColumns(20); + Style.SpacesInParens = FormatStyle::SIPO_Custom; + Style.SpacesInParensOptions.Other = true; + verifyFormat("var literal =\n" + " ( 'xxxxxxxx ' +\n" + " 'xxxxxx' )[0];", + "var literal = 'xxxxxxxx xxxxxx'[0];", Style); + // FIXME: When the part before the string literal is shorter than the + // continuation indentation, and the option AlignAfterOpenBracket is set to + // AlwaysBreak which is the default for the Google style, the unbroken string + // does not get to a new line while the broken string does due to the added + // parentheses. The formatter does not do it in one pass. + EXPECT_EQ( + "x = ('xxxxxxxx ' +\n" + " 'xxxxxx')[0];", + format("x = 'xxxxxxxx xxxxxx'[0];", getGoogleJSStyleWithColumns(18))); + verifyFormat("x =\n" + " ('xxxxxxxx ' +\n" + " 'xxxxxx')[0];", + getGoogleJSStyleWithColumns(18)); + // Breaking of template strings ans regular expressions is not implemented. + verifyFormat("var literal =\n" + " `xxxxxxxx xxxxxxxx`;", + getGoogleJSStyleWithColumns(18)); + verifyFormat("var literal =\n" + " /xxxxxxxx xxxxxxxx/;", + getGoogleJSStyleWithColumns(18)); + // There can be breaks in the code inside a template string. + verifyFormat("var literal = `xxxxxx ${\n" + " xxxxxxxxxx} xxxxxx`;", + "var literal = `xxxxxx ${xxxxxxxxxx} xxxxxx`;", + getGoogleJSStyleWithColumns(14)); + verifyFormat("var literal = `xxxxxx ${\n" + " xxxxxxxxxx} xxxxxx`;", + "var literal = `xxxxxx ${xxxxxxxxxx} xxxxxx`;", + getGoogleJSStyleWithColumns(15)); + // Identifiers inside the code inside a template string should not be broken + // even if the column limit is exceeded. This following behavior is not + // optimal. The part after the closing brace which exceeds the column limit + // can be put on a new line. Change this test when it is implemented. + verifyFormat("var literal = `xxxxxx ${\n" + " xxxxxxxxxxxxxxxxxxxxxx} xxxxxx`;", + "var literal = `xxxxxx ${xxxxxxxxxxxxxxxxxxxxxx} xxxxxx`;", + getGoogleJSStyleWithColumns(14)); + verifyFormat("var literal = `xxxxxx ${\n" + " xxxxxxxxxxxxxxxxxxxxxx +\n" + " xxxxxxxxxxxxxxxxxxxxxx} xxxxxx`;", + "var literal = `xxxxxx ${xxxxxxxxxxxxxxxxxxxxxx + " + "xxxxxxxxxxxxxxxxxxxxxx} xxxxxx`;", + getGoogleJSStyleWithColumns(14)); } TEST_F(FormatTestJS, RegexLiteralClassification) { diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp --- a/clang/unittests/Format/FormatTestJava.cpp +++ b/clang/unittests/Format/FormatTestJava.cpp @@ -6,34 +6,19 @@ // //===----------------------------------------------------------------------===// -#include "FormatTestUtils.h" -#include "clang/Format/Format.h" -#include "llvm/Support/Debug.h" -#include "gtest/gtest.h" +#include "FormatTestBase.h" #define DEBUG_TYPE "format-test" namespace clang { namespace format { +namespace test { +namespace { -class FormatTestJava : public ::testing::Test { +class FormatTestJava : public test::FormatTestBase { protected: - static std::string format(llvm::StringRef Code, unsigned Offset, - unsigned Length, const FormatStyle &Style) { - LLVM_DEBUG(llvm::errs() << "---\n"); - LLVM_DEBUG(llvm::errs() << Code << "\n\n"); - std::vector Ranges(1, tooling::Range(Offset, Length)); - tooling::Replacements Replaces = reformat(Style, Code, Ranges); - auto Result = applyAllReplacements(Code, Replaces); - EXPECT_TRUE(static_cast(Result)); - LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n"); - return *Result; - } - - static std::string - format(llvm::StringRef Code, - const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_Java)) { - return format(Code, 0, Code.size(), Style); + FormatStyle getDefaultStyle() const override { + return getGoogleStyle(FormatStyle::LK_Java); } static FormatStyle getStyleWithColumns(unsigned ColumnLimit) { @@ -41,13 +26,6 @@ Style.ColumnLimit = ColumnLimit; return Style; } - - static void verifyFormat( - llvm::StringRef Code, - const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_Java)) { - EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable"; - EXPECT_EQ(Code.str(), format(test::messUp(Code), Style)); - } }; TEST_F(FormatTestJava, NoAlternativeOperatorNames) { @@ -565,9 +543,9 @@ } TEST_F(FormatTestJava, BreaksStringLiterals) { - // FIXME: String literal breaking is currently disabled for Java and JS, as it - // requires strings to be merged using "+" which we don't support. - verifyFormat("\"some text other\";", getStyleWithColumns(14)); + verifyFormat("x = \"some text \"\n" + " + \"other\";", + "x = \"some text other\";", getStyleWithColumns(18)); } TEST_F(FormatTestJava, AlignsBlockComments) { @@ -625,5 +603,7 @@ Style); } +} // namespace +} // namespace test } // namespace format -} // end namespace clang +} // namespace clang diff --git a/clang/unittests/Format/FormatTestVerilog.cpp b/clang/unittests/Format/FormatTestVerilog.cpp --- a/clang/unittests/Format/FormatTestVerilog.cpp +++ b/clang/unittests/Format/FormatTestVerilog.cpp @@ -1157,6 +1157,66 @@ verifyFormat("{<