diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -910,9 +910,64 @@ /// Returns \c true if \p Tok is a true JavaScript identifier, returns /// \c false if it is a keyword or a pseudo keyword. bool IsJavaScriptIdentifier(const FormatToken &Tok) const { - return Tok.is(tok::identifier) && - JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == - JsExtraKeywords.end(); + // Based on the list of JavaScript & TypeScript keywords here: + // https://github.com/microsoft/TypeScript/blob/master/src/compiler/scanner.ts#L74 + switch (Tok.Tok.getKind()) { + case tok::kw_break: + case tok::kw_case: + case tok::kw_catch: + case tok::kw_class: + case tok::kw_continue: + case tok::kw_const: + case tok::kw_default: + case tok::kw_delete: + case tok::kw_do: + case tok::kw_else: + case tok::kw_enum: + case tok::kw_export: + case tok::kw_false: + case tok::kw_for: + case tok::kw_if: + case tok::kw_import: + case tok::kw_module: + case tok::kw_new: + case tok::kw_private: + case tok::kw_protected: + case tok::kw_public: + case tok::kw_return: + case tok::kw_static: + case tok::kw_switch: + case tok::kw_this: + case tok::kw_throw: + case tok::kw_true: + case tok::kw_try: + case tok::kw_typeof: + case tok::kw_void: + case tok::kw_while: + // These are JS keywords that are lexed by LLVM/clang as keywords. + return false; + case tok::identifier: + // For identifiers, make sure they are true identifiers, excluding the + // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords). + return JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == + JsExtraKeywords.end(); + default: + // Other keywords are handled in the switch below, to avoid problems due + // to duplicate case labels when using the #include trick. + break; + } + + switch (Tok.Tok.getKind()) { + // Handle C++ keywords not included above: these are all JS identifiers. +#define KEYWORD(X, Y) case tok::kw_##X: +#include "clang/Basic/TokenKinds.def" + // #undef KEYWORD is not needed -- it's #undef-ed at the end of + // TokenKinds.def + return true; + default: + // All other tokens (punctuation etc) are not JS identifiers. + return false; + } } /// Returns \c true if \p Tok is a C# keyword, returns diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1522,9 +1522,9 @@ if (Style.Language == FormatStyle::LK_JavaScript) { if (Current.is(tok::exclaim)) { if (Current.Previous && - (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace, - tok::r_paren, tok::r_square, - tok::r_brace) || + (Keywords.IsJavaScriptIdentifier(*Current.Previous) || + Current.Previous->isOneOf(tok::kw_namespace, tok::r_paren, + tok::r_square, tok::r_brace) || Current.Previous->Tok.isLiteral())) { Current.Type = TT_JsNonNullAssertion; return; @@ -3070,10 +3070,8 @@ (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) return false; // In tagged template literals ("html`bar baz`"), there is no space between - // the tag identifier and the template string. getIdentifierInfo makes sure - // that the identifier is not a pseudo keyword like `yield`, either. - if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) && - Right.is(TT_TemplateString)) + // the tag identifier and the template string. + if (Keywords.IsJavaScriptIdentifier(Left) && Right.is(TT_TemplateString)) return false; if (Right.is(tok::star) && Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) diff --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp --- a/clang/unittests/Format/FormatTestJS.cpp +++ b/clang/unittests/Format/FormatTestJS.cpp @@ -386,13 +386,6 @@ "return (x);\n"); } -TEST_F(FormatTestJS, CppKeywords) { - // Make sure we don't mess stuff up because of C++ keywords. - verifyFormat("return operator && (aa);"); - // .. or QT ones. - verifyFormat("slots: Slot[];"); -} - TEST_F(FormatTestJS, ES6DestructuringAssignment) { verifyFormat("var [a, b, c] = [1, 2, 3];"); verifyFormat("const [a, b, c] = [1, 2, 3];"); @@ -2366,6 +2359,61 @@ verifyFormat("return !!x;\n"); } +TEST_F(FormatTestJS, CppKeywords) { + // Make sure we don't mess stuff up because of C++ keywords. + verifyFormat("return operator && (aa);"); + // .. or QT ones. + verifyFormat("const slots: Slot[];"); + // use the "!" assertion operator to validate that clang-format understands + // these C++ keywords aren't keywords in JS/TS. + verifyFormat("auto!;"); + verifyFormat("char!;"); + verifyFormat("concept!;"); + verifyFormat("double!;"); + verifyFormat("extern!;"); + verifyFormat("float!;"); + verifyFormat("inline!;"); + verifyFormat("int!;"); + verifyFormat("long!;"); + verifyFormat("register!;"); + verifyFormat("restrict!;"); + verifyFormat("sizeof!;"); + verifyFormat("struct!;"); + verifyFormat("typedef!;"); + verifyFormat("union!;"); + verifyFormat("unsigned!;"); + verifyFormat("volatile!;"); + verifyFormat("_Alignas!;"); + verifyFormat("_Alignof!;"); + verifyFormat("_Atomic!;"); + verifyFormat("_Bool!;"); + verifyFormat("_Complex!;"); + verifyFormat("_Generic!;"); + verifyFormat("_Imaginary!;"); + verifyFormat("_Noreturn!;"); + verifyFormat("_Static_assert!;"); + verifyFormat("_Thread_local!;"); + verifyFormat("__func__!;"); + verifyFormat("__objc_yes!;"); + verifyFormat("__objc_no!;"); + verifyFormat("asm!;"); + verifyFormat("bool!;"); + verifyFormat("const_cast!;"); + verifyFormat("dynamic_cast!;"); + verifyFormat("explicit!;"); + verifyFormat("friend!;"); + verifyFormat("mutable!;"); + verifyFormat("operator!;"); + verifyFormat("reinterpret_cast!;"); + verifyFormat("static_cast!;"); + verifyFormat("template!;"); + verifyFormat("typename!;"); + verifyFormat("typeid!;"); + verifyFormat("using!;"); + verifyFormat("virtual!;"); + verifyFormat("wchar_t!;"); +} + TEST_F(FormatTestJS, NullPropagatingOperator) { verifyFormat("let x = foo?.bar?.baz();\n"); verifyFormat("let x = foo?.(foo);\n");