Index: clang/lib/Format/ContinuationIndenter.cpp =================================================================== --- clang/lib/Format/ContinuationIndenter.cpp +++ clang/lib/Format/ContinuationIndenter.cpp @@ -1760,7 +1760,7 @@ LineState &State, bool AllowBreak) { unsigned StartColumn = State.Column - Current.ColumnWidth; if (Current.isStringLiteral()) { - // FIXME: String literal breaking is currently disabled for C#,Java and + // FIXME: String literal breaking is currently disabled for C#, Java and // JavaScript, as it requires strings to be merged using "+" which we // don't support. if (Style.Language == FormatStyle::LK_Java || Index: clang/lib/Format/FormatTokenLexer.h =================================================================== --- clang/lib/Format/FormatTokenLexer.h +++ clang/lib/Format/FormatTokenLexer.h @@ -79,6 +79,8 @@ // nested template parts by balancing curly braces. void handleTemplateStrings(); + void handleCSharpVerbatimAndInterpolatedStrings(); + void tryParsePythonComment(); bool tryMerge_TMacro(); Index: clang/lib/Format/FormatTokenLexer.cpp =================================================================== --- clang/lib/Format/FormatTokenLexer.cpp +++ clang/lib/Format/FormatTokenLexer.cpp @@ -57,6 +57,10 @@ if (Style.Language == FormatStyle::LK_TextProto) tryParsePythonComment(); tryMergePreviousTokens(); + if (Style.isCSharp()) + // This needs to come after tokens have been merged so that C# + // string literals are correctly identified. + handleCSharpVerbatimAndInterpolatedStrings(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->Tok.isNot(tok::eof)); @@ -181,12 +185,12 @@ // Search for verbatim or interpolated string literals @"ABC" or // $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to // prevent splitting of @, $ and ". +// Merging of multiline verbatim strings with embedded '"' is handled in +// handleCSharpVerbatimAndInterpolatedStrings with lower-level lexing. bool FormatTokenLexer::tryMergeCSharpStringLiteral() { if (Tokens.size() < 2) return false; - auto &CSharpStringLiteral = *(Tokens.end() - 2); - // Interpolated strings could contain { } with " characters inside. // $"{x ?? "null"}" // should not be split into $"{x ?? ", null, "}" but should treated as a @@ -236,27 +240,12 @@ } } - // verbatim strings could contain "" which C# sees as an escaped ". - // @"""Hello""" will have been tokenized as @"" "Hello" "" and needs - // merging into a single string literal. + // Look for @"aaaaaa" or $"aaaaaa". auto &String = *(Tokens.end() - 1); if (!String->is(tok::string_literal)) return false; - if (CSharpStringLiteral->Type == TT_CSharpStringLiteral && - (CSharpStringLiteral->TokenText.startswith(R"(@")") || - CSharpStringLiteral->TokenText.startswith(R"($@")"))) { - CSharpStringLiteral->TokenText = StringRef( - CSharpStringLiteral->TokenText.begin(), - String->TokenText.end() - CSharpStringLiteral->TokenText.begin()); - CSharpStringLiteral->ColumnWidth += String->ColumnWidth; - Tokens.erase(Tokens.end() - 1); - return true; - } - auto &At = *(Tokens.end() - 2); - - // Look for @"aaaaaa" or $"aaaaaa". if (!(At->is(tok::at) || At->TokenText == "$")) return false; @@ -498,6 +487,70 @@ resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); } +void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() { + FormatToken *CSharpStringLiteral = Tokens.back(); + + if (CSharpStringLiteral->Type != TT_CSharpStringLiteral) + return; + + // Deal with multiline strings. + if (!(CSharpStringLiteral->TokenText.startswith(R"(@")") || + CSharpStringLiteral->TokenText.startswith(R"($@")"))) + return; + + const char *StrBegin = + Lex->getBufferLocation() - CSharpStringLiteral->TokenText.size(); + const char *Offset = StrBegin; + if (CSharpStringLiteral->TokenText.startswith(R"(@")")) + Offset += 2; + else // CSharpStringLiteral->TokenText.startswith(R"($@")") + Offset += 3; + + // Look for a terminating '"' in the current file buffer. + // Make no effort to format code within an interpolated or verbatim string. + for (; Offset != Lex->getBuffer().end(); ++Offset) { + if (Offset[0] == '"') { + // "" within a verbatim string is an escaped double quote: skip it. + if (Offset + 1 < Lex->getBuffer().end() && Offset[1] == '"') + ++Offset; + else + break; + } + } + + // Make no attempt to format code properly if a verbatim string is + // unterminated. + if (Offset == Lex->getBuffer().end()) + return; + + StringRef LiteralText(StrBegin, Offset - StrBegin + 1); + CSharpStringLiteral->TokenText = LiteralText; + + // Adjust width for potentially multiline string literals. + size_t FirstBreak = LiteralText.find('\n'); + StringRef FirstLineText = FirstBreak == StringRef::npos + ? LiteralText + : LiteralText.substr(0, FirstBreak); + CSharpStringLiteral->ColumnWidth = encoding::columnWidthWithTabs( + FirstLineText, CSharpStringLiteral->OriginalColumn, Style.TabWidth, + Encoding); + size_t LastBreak = LiteralText.rfind('\n'); + if (LastBreak != StringRef::npos) { + CSharpStringLiteral->IsMultiline = true; + unsigned StartColumn = 0; // The template tail spans the entire line. + CSharpStringLiteral->LastLineColumnWidth = encoding::columnWidthWithTabs( + LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn, + Style.TabWidth, Encoding); + } + + Lex->getSourceLocation(Lex->getBuffer().end()); + + SourceLocation loc = Offset < Lex->getBuffer().end() + ? Lex->getSourceLocation(Offset + 1) + : SourceMgr.getLocForEndOfFile(ID); + resetLexer(SourceMgr.getFileOffset(loc)); +} + void FormatTokenLexer::handleTemplateStrings() { FormatToken *BacktickToken = Tokens.back(); Index: clang/unittests/Format/FormatTestCSharp.cpp =================================================================== --- clang/unittests/Format/FormatTestCSharp.cpp +++ clang/unittests/Format/FormatTestCSharp.cpp @@ -412,9 +412,9 @@ TEST_F(FormatTestCSharp, CSharpEscapedQuotesInVerbatimStrings) { FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp); - verifyFormat(R"(string str = @"""")", Style); - verifyFormat(R"(string str = @"""Hello world""")", Style); - verifyFormat(R"(string str = $@"""Hello {friend}""")", Style); + verifyFormat(R"(string str = @"""";)", Style); + verifyFormat(R"(string str = @"""Hello world""";)", Style); + verifyFormat(R"(string str = $@"""Hello {friend}""";)", Style); } TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) { @@ -425,5 +425,19 @@ verifyFormat(R"(string str3 = $"{braceCount}}} braces";)", Style); } +TEST_F(FormatTestCSharp, CSharpNewlinesInVerbatimStrings) { + // Use MS style as Google Style inserts a line break before multiline strings. + FormatStyle Style = getMicrosoftStyle(FormatStyle::LK_CSharp); + + std::string Code = R"(string s1 = $@"some code: + class {className} {{ + {className}() {{}} + }}";)"; + + // verifyFormat does not understand multiline C# string-literals + // so check the format explicitly. + EXPECT_EQ(Code, format(Code, Style)); +} + } // namespace format } // end namespace clang