Index: lib/Format/Format.cpp =================================================================== --- lib/Format/Format.cpp +++ lib/Format/Format.cpp @@ -807,8 +807,10 @@ assert(FirstInLineIndex == 0); do { Tokens.push_back(getNextToken()); - if (Style.Language == FormatStyle::LK_JavaScript) + if (Style.Language == FormatStyle::LK_JavaScript) { tryParseJSRegexLiteral(); + tryParseTemplateString(); + } tryMergePreviousTokens(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; @@ -828,9 +830,6 @@ return; if (Style.Language == FormatStyle::LK_JavaScript) { - if (tryMergeTemplateString()) - return; - static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal, tok::equal}; @@ -992,92 +991,42 @@ resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); } - bool tryMergeTemplateString() { - if (Tokens.size() < 2) - return false; - - FormatToken *EndBacktick = Tokens.back(); - // Backticks get lexed as tok::unknown tokens. If a template string contains - // a comment start, it gets lexed as a tok::comment, or tok::unknown if - // unterminated. - if (!EndBacktick->isOneOf(tok::comment, tok::string_literal, - tok::char_constant, tok::unknown)) - return false; - size_t CommentBacktickPos = EndBacktick->TokenText.find('`'); - // Unknown token that's not actually a backtick, or a comment that doesn't - // contain a backtick. - if (CommentBacktickPos == StringRef::npos) - return false; - - unsigned TokenCount = 0; - bool IsMultiline = false; - unsigned EndColumnInFirstLine = - EndBacktick->OriginalColumn + EndBacktick->ColumnWidth; - for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) { - ++TokenCount; - if (I[0]->IsMultiline) - IsMultiline = true; - - // If there was a preceding template string, this must be the start of a - // template string, not the end. - if (I[0]->is(TT_TemplateString)) - return false; + void tryParseTemplateString() { + FormatToken *BacktickToken = Tokens.back(); + if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`") + return; - if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") { - // Keep track of the rhs offset of the last token to wrap across lines - - // its the rhs offset of the first line of the template string, used to - // determine its width. - if (I[0]->IsMultiline) - EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth; - // If the token has newlines, the token before it (if it exists) is the - // rhs end of the previous line. - if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) { - EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth; - IsMultiline = true; - } - continue; - } + // 'Manually' lex ahead in the current file buffer. + const char *Offset = Lex->getBufferLocation(); + const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`" + for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) { + if (*Offset == '\\') + ++Offset; // Skip the escaped character. + } - Tokens.resize(Tokens.size() - TokenCount); - Tokens.back()->Type = TT_TemplateString; - const char *EndOffset = - EndBacktick->TokenText.data() + 1 + CommentBacktickPos; - if (CommentBacktickPos != 0) { - // If the backtick was not the first character (e.g. in a comment), - // re-lex after the backtick position. - SourceLocation Loc = EndBacktick->Tok.getLocation(); - resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1); - } - Tokens.back()->TokenText = - StringRef(Tokens.back()->TokenText.data(), - EndOffset - Tokens.back()->TokenText.data()); - - unsigned EndOriginalColumn = EndBacktick->OriginalColumn; - if (EndOriginalColumn == 0) { - SourceLocation Loc = EndBacktick->Tok.getLocation(); - EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc); - } - // If the ` is further down within the token (e.g. in a comment). - EndOriginalColumn += CommentBacktickPos; - - if (IsMultiline) { - // ColumnWidth is from backtick to last token in line. - // LastLineColumnWidth is 0 to backtick. - // x = `some content - // until here`; - Tokens.back()->ColumnWidth = - EndColumnInFirstLine - Tokens.back()->OriginalColumn; - // +1 for the ` itself. - Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1; - Tokens.back()->IsMultiline = true; - } else { - // Token simply spans from start to end, +1 for the ` itself. - Tokens.back()->ColumnWidth = - EndOriginalColumn - Tokens.back()->OriginalColumn + 1; - } - return true; + StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1); + BacktickToken->Type = TT_TemplateString; + BacktickToken->Tok.setKind(tok::string_literal); + BacktickToken->TokenText = LiteralText; + + // Adjust width for potentially multiline string literals. + size_t FirstBreak = LiteralText.find('\n'); + StringRef FirstLineText = FirstBreak == StringRef::npos + ? LiteralText + : LiteralText.substr(0, FirstBreak); + BacktickToken->ColumnWidth = encoding::columnWidthWithTabs( + FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding); + size_t LastBreak = LiteralText.rfind('\n'); + if (LastBreak != StringRef::npos) { + BacktickToken->IsMultiline = true; + unsigned StartColumn = 0; // The template tail spans the entire line. + BacktickToken->LastLineColumnWidth = encoding::columnWidthWithTabs( + LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn, + Style.TabWidth, Encoding); } - return false; + + resetLexer( + SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1))); } bool tryMerge_TMacro() { Index: unittests/Format/FormatTestJS.cpp =================================================================== --- unittests/Format/FormatTestJS.cpp +++ unittests/Format/FormatTestJS.cpp @@ -1076,6 +1076,8 @@ getGoogleJSStyleWithColumns(34)); // Barely doesn't fit. verifyFormat("var x = `hello ${world}` >= some();", getGoogleJSStyleWithColumns(35)); // Barely fits. + verifyFormat("var x = `hellö ${wörld}` >= söme();", + getGoogleJSStyleWithColumns(35)); // Fits due to UTF-8. verifyFormat("var x = `hello\n" " ${world}` >=\n" " some();", @@ -1097,6 +1099,13 @@ getGoogleJSStyleWithColumns(13)); verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" " `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`);"); + // Repro for an obscure width-miscounting issue with template strings. + verifyFormat( + "someLongVariable =\n" + " " + "`${logPrefix[11]}/${logPrefix[12]}/${logPrefix[13]}${logPrefix[14]}`;", + "someLongVariable = " + "`${logPrefix[11]}/${logPrefix[12]}/${logPrefix[13]}${logPrefix[14]}`;"); // Make sure template strings get a proper ColumnWidth assigned, even if they // are first token in line.