Index: lib/Format/UnwrappedLineParser.h =================================================================== --- lib/Format/UnwrappedLineParser.h +++ lib/Format/UnwrappedLineParser.h @@ -81,6 +81,7 @@ void parsePPElse(); void parsePPEndIf(); void parsePPUnknown(); + void readTokenWithJavaScriptASI(); void parseStructuralElement(); bool tryToParseBracedList(); bool parseBracedList(bool ContinueOnSemicolons = false); Index: lib/Format/UnwrappedLineParser.cpp =================================================================== --- lib/Format/UnwrappedLineParser.cpp +++ lib/Format/UnwrappedLineParser.cpp @@ -660,6 +660,72 @@ Tok.isNot(tok::kw_noexcept); } +static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, + const FormatToken *FormatTok) { + if (FormatTok->Tok.isLiteral()) + return true; + // FIXME(martinprobst): This returns true for C/C++ keywords like 'struct'. + return FormatTok->is(tok::identifier) && + (FormatTok->Tok.getIdentifierInfo() == nullptr || + !FormatTok->isOneOf(Keywords.kw_in, Keywords.kw_of, + Keywords.kw_finally, Keywords.kw_function, + Keywords.kw_import, Keywords.kw_is, + Keywords.kw_let, Keywords.kw_var, + Keywords.kw_abstract, Keywords.kw_extends, + Keywords.kw_implements, Keywords.kw_instanceof, + Keywords.kw_interface, Keywords.kw_throws)); +} + +// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement +// when encountered after a value (see mustBeJSIdentOrValue). +static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, + const FormatToken *FormatTok) { + return FormatTok->isOneOf( + tok::kw_return, + // conditionals + tok::kw_if, tok::kw_else, + // loops + tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, + // switch/case + tok::kw_switch, tok::kw_case, + // exceptions + tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, + // declaration + tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, + Keywords.kw_function); +} + +// readTokenWithJavaScriptASI reads the next token and terminates the current +// line if JavaScript Automatic Semicolon Insertion must +// happen between the current token and the next token. +// +// This method is conservative - it cannot cover all edge cases of JavaScript, +// but only aims to correctly handle certain well known cases. It *must not* +// return true in speculative cases. +void UnwrappedLineParser::readTokenWithJavaScriptASI() { + FormatToken *Previous = FormatTok; + readToken(); + FormatToken *Next = FormatTok; + + bool IsOnSameLine = + CommentsBeforeNextToken.empty() + ? Next->NewlinesBefore == 0 + : CommentsBeforeNextToken.front()->NewlinesBefore == 0; + if (IsOnSameLine) + return; + + bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); + if (Next->is(tok::exclaim) && PreviousMustBeValue) + addUnwrappedLine(); + bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); + if (NextMustBeValue && (PreviousMustBeValue || + Previous->isOneOf(tok::r_square, tok::r_paren, + tok::plusplus, tok::minusminus))) + addUnwrappedLine(); + if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next)) + addUnwrappedLine(); +} + void UnwrappedLineParser::parseStructuralElement() { assert(!FormatTok->is(tok::l_brace)); if (Style.Language == FormatStyle::LK_TableGen && @@ -936,6 +1002,7 @@ return; } + // See if the following token should start a new unwrapped line. StringRef Text = FormatTok->TokenText; nextToken(); if (Line->Tokens.size() == 1 && @@ -1898,7 +1965,10 @@ return; flushComments(isOnNewLine(*FormatTok)); pushToken(FormatTok); - readToken(); + if (Style.Language != FormatStyle::LK_JavaScript) + readToken(); + else + readTokenWithJavaScriptASI(); } const FormatToken *UnwrappedLineParser::getPreviousToken() { Index: unittests/Format/FormatTestJS.cpp =================================================================== --- unittests/Format/FormatTestJS.cpp +++ unittests/Format/FormatTestJS.cpp @@ -49,8 +49,16 @@ static void verifyFormat( llvm::StringRef Code, const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_JavaScript)) { - std::string result = format(test::messUp(Code), Style); - EXPECT_EQ(Code.str(), result) << "Formatted:\n" << result; + std::string Result = format(test::messUp(Code), Style); + EXPECT_EQ(Code.str(), Result) << "Formatted:\n" << Result; + } + + static void verifyFormat( + llvm::StringRef Expected, + llvm::StringRef Code, + const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_JavaScript)) { + std::string Result = format(Code, Style); + EXPECT_EQ(Expected.str(), Result) << "Formatted:\n" << Result; } }; @@ -612,7 +620,7 @@ "}"); } -TEST_F(FormatTestJS, AutomaticSemicolonInsertion) { +TEST_F(FormatTestJS, WrapRespectsAutomaticSemicolonInsertion) { // The following statements must not wrap, as otherwise the program meaning // would change due to automatic semicolon insertion. // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.9.1. @@ -628,6 +636,54 @@ getGoogleJSStyleWithColumns(12)); } +TEST_F(FormatTestJS, AutomaticSemicolonInsertionHeuristic) { + verifyFormat("a\n" + "b;", + " a \n" + " b ;"); + verifyFormat("a()\n" + "b;", + " a ()\n" + " b ;"); + verifyFormat("a[b]\n" + "c;", + "a [b]\n" + "c ;"); + verifyFormat("1\n" + "a;", + "1 \n" + "a ;"); + verifyFormat("a\n" + "1;", + "a \n" + "1 ;"); + verifyFormat("a\n" + "'x';", + "a \n" + " 'x';"); + verifyFormat("a++\n" + "b;", + "a ++\n" + "b ;"); + verifyFormat("a\n" + "!b && c;", + "a \n" + " ! b && c;"); + verifyFormat("a\n" + "if (1) f();", + " a\n" + " if (1) f();"); + verifyFormat("a\n" + "class X {}", + " a\n" + " class X {}"); + verifyFormat("var a", "var\n" + "a"); + verifyFormat("x instanceof String", "x\n" + "instanceof\n" + "String"); +} + TEST_F(FormatTestJS, ClosureStyleCasts) { verifyFormat("var x = /** @type {foo} */ (bar);"); } @@ -726,13 +782,13 @@ verifyFormat("var regex = /\a\\//g;"); verifyFormat("var regex = /a\\//;\n" "var x = 0;"); - EXPECT_EQ("var regex = /'/g;", format("var regex = /'/g ;")); - EXPECT_EQ("var regex = /'/g; //'", format("var regex = /'/g ; //'")); - EXPECT_EQ("var regex = /\\/*/;\n" - "var x = 0;", - format("var regex = /\\/*/;\n" - "var x=0;")); - EXPECT_EQ("var x = /a\\//;", format("var x = /a\\// \n;")); + verifyFormat("var regex = /'/g;", "var regex = /'/g ;"); + verifyFormat("var regex = /'/g; //'", "var regex = /'/g ; //'"); + verifyFormat("var regex = /\\/*/;\n" + "var x = 0;", + "var regex = /\\/*/;\n" + "var x=0;"); + verifyFormat("var x = /a\\//;", "var x = /a\\// \n;"); verifyFormat("var regex = /\"/;", getGoogleJSStyleWithColumns(16)); verifyFormat("var regex =\n" " /\"/;", @@ -942,38 +998,37 @@ TEST_F(FormatTestJS, TemplateStrings) { // Keeps any whitespace/indentation within the template string. - EXPECT_EQ("var x = `hello\n" + verifyFormat("var x = `hello\n" " ${ name }\n" " !`;", - format("var x = `hello\n" + "var x = `hello\n" " ${ name }\n" - " !`;")); + " !`;"); verifyFormat("var x =\n" " `hello ${world}` >= some();", getGoogleJSStyleWithColumns(34)); // Barely doesn't fit. verifyFormat("var x = `hello ${world}` >= some();", getGoogleJSStyleWithColumns(35)); // Barely fits. - EXPECT_EQ("var x = `hello\n" + verifyFormat("var x = `hello\n" " ${world}` >=\n" " some();", - format("var x =\n" + "var x =\n" " `hello\n" " ${world}` >= some();", - getGoogleJSStyleWithColumns(21))); // Barely doesn't fit. - EXPECT_EQ("var x = `hello\n" + getGoogleJSStyleWithColumns(21)); // Barely doesn't fit. + verifyFormat("var x = `hello\n" " ${world}` >= some();", - format("var x =\n" + "var x =\n" " `hello\n" " ${world}` >= some();", - getGoogleJSStyleWithColumns(22))); // Barely fits. + getGoogleJSStyleWithColumns(22)); // Barely fits. verifyFormat("var x =\n" " `h`;", getGoogleJSStyleWithColumns(11)); - EXPECT_EQ( - "var x =\n `multi\n line`;", - format("var x = `multi\n line`;", getGoogleJSStyleWithColumns(13))); + verifyFormat("var x =\n `multi\n line`;", "var x = `multi\n line`;", + getGoogleJSStyleWithColumns(13)); verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" " `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`);"); @@ -987,39 +1042,38 @@ verifyFormat("var x = `hello` == `hello`;"); // Comments in template strings. - EXPECT_EQ("var x = `//a`;\n" - "var y;", - format("var x =\n `//a`;\n" - "var y ;")); - EXPECT_EQ("var x = `/*a`;\n" + verifyFormat("var x = `//a`;\n" "var y;", - format("var x =\n `/*a`;\n" - "var y;")); + "var x =\n `//a`;\n" + "var y ;"); + verifyFormat("var x = `/*a`;\n" + "var y;", + "var x =\n `/*a`;\n" + "var y;"); // Unterminated string literals in a template string. verifyFormat("var x = `'`; // comment with matching quote '\n" "var y;"); verifyFormat("var x = `\"`; // comment with matching quote \"\n" "var y;"); - EXPECT_EQ("it(`'aaaaaaaaaaaaaaa `, aaaaaaaaa);", - format("it(`'aaaaaaaaaaaaaaa `, aaaaaaaaa) ;", - getGoogleJSStyleWithColumns(40))); + verifyFormat("it(`'aaaaaaaaaaaaaaa `, aaaaaaaaa);", + "it(`'aaaaaaaaaaaaaaa `, aaaaaaaaa) ;", + getGoogleJSStyleWithColumns(40)); // Backticks in a comment - not a template string. - EXPECT_EQ("var x = 1 // `/*a`;\n" - " ;", - format("var x =\n 1 // `/*a`;\n" - " ;")); - EXPECT_EQ("/* ` */ var x = 1; /* ` */", - format("/* ` */ var x\n= 1; /* ` */")); + verifyFormat("var x = 1 // `/*a`;\n" + " ;", + "var x =\n 1 // `/*a`;\n" + " ;"); + verifyFormat("/* ` */ var x = 1; /* ` */", "/* ` */ var x\n= 1; /* ` */"); // Comment spans multiple template strings. - EXPECT_EQ("var x = `/*a`;\n" - "var y = ` */ `;", - format("var x =\n `/*a`;\n" - "var y =\n ` */ `;")); + verifyFormat("var x = `/*a`;\n" + "var y = ` */ `;", + "var x =\n `/*a`;\n" + "var y =\n ` */ `;"); // Escaped backtick. - EXPECT_EQ("var x = ` \\` a`;\n" - "var y;", - format("var x = ` \\` a`;\n" - "var y;")); + verifyFormat("var x = ` \\` a`;\n" + "var y;", + "var x = ` \\` a`;\n" + "var y;"); } TEST_F(FormatTestJS, CastSyntax) { verifyFormat("var x = foo;"); } @@ -1089,37 +1143,38 @@ } TEST_F(FormatTestJS, JSDocAnnotations) { - EXPECT_EQ("/**\n" - " * @export {this.is.a.long.path.to.a.Type}\n" - " */", - format("/**\n" - " * @export {this.is.a.long.path.to.a.Type}\n" - " */", - getGoogleJSStyleWithColumns(20))); + verifyFormat("/**\n" + " * @export {this.is.a.long.path.to.a.Type}\n" + " */", + "/**\n" + " * @export {this.is.a.long.path.to.a.Type}\n" + " */", + getGoogleJSStyleWithColumns(20)); } TEST_F(FormatTestJS, RequoteStringsSingle) { - EXPECT_EQ("var x = 'foo';", format("var x = \"foo\";")); - EXPECT_EQ("var x = 'fo\\'o\\'';", format("var x = \"fo'o'\";")); - EXPECT_EQ("var x = 'fo\\'o\\'';", format("var x = \"fo\\'o'\";")); - EXPECT_EQ("var x =\n" - " 'foo\\'';", - // Code below is 15 chars wide, doesn't fit into the line with the - // \ escape added. - format("var x = \"foo'\";", getGoogleJSStyleWithColumns(15))); + verifyFormat("var x = 'foo';", "var x = \"foo\";"); + verifyFormat("var x = 'fo\\'o\\'';", "var x = \"fo'o'\";"); + verifyFormat("var x = 'fo\\'o\\'';", "var x = \"fo\\'o'\";"); + verifyFormat( + "var x =\n" + " 'foo\\'';", + // Code below is 15 chars wide, doesn't fit into the line with the + // \ escape added. + "var x = \"foo'\";", getGoogleJSStyleWithColumns(15)); // Removes no-longer needed \ escape from ". - EXPECT_EQ("var x = 'fo\"o';", format("var x = \"fo\\\"o\";")); + verifyFormat("var x = 'fo\"o';", "var x = \"fo\\\"o\";"); // Code below fits into 15 chars *after* removing the \ escape. - EXPECT_EQ("var x = 'fo\"o';", - format("var x = \"fo\\\"o\";", getGoogleJSStyleWithColumns(15))); + verifyFormat("var x = 'fo\"o';", "var x = \"fo\\\"o\";", + getGoogleJSStyleWithColumns(15)); } TEST_F(FormatTestJS, RequoteStringsDouble) { FormatStyle DoubleQuotes = getGoogleStyle(FormatStyle::LK_JavaScript); DoubleQuotes.JavaScriptQuotes = FormatStyle::JSQS_Double; verifyFormat("var x = \"foo\";", DoubleQuotes); - EXPECT_EQ("var x = \"foo\";", format("var x = 'foo';", DoubleQuotes)); - EXPECT_EQ("var x = \"fo'o\";", format("var x = 'fo\\'o';", DoubleQuotes)); + verifyFormat("var x = \"foo\";", "var x = 'foo';", DoubleQuotes); + verifyFormat("var x = \"fo'o\";", "var x = 'fo\\'o';", DoubleQuotes); } TEST_F(FormatTestJS, RequoteStringsLeave) {