diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -20,6 +20,7 @@ #include "clang/Format/Format.h" #include "clang/Lex/Lexer.h" #include +#include #include namespace clang { @@ -880,228 +881,129 @@ /// Encapsulates keywords that are context sensitive or for languages not /// properly supported by Clang's lexer. struct AdditionalKeywords { +#define LIST_ADDITIONAL_KEYWORDS \ + /* Context-sensitive ones that appear in C++ or ObjC. The lexer lexes them \ + * as identifiers under our settings. */ \ + KEYWORD(final, ATTR_CSHARP_KEYWORD) \ + KEYWORD(override, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(in, ATTR_CSHARP_KEYWORD) \ + KEYWORD(of, 0) \ + KEYWORD(CF_CLOSED_ENUM, 0) \ + KEYWORD(CF_ENUM, 0) \ + KEYWORD(CF_OPTIONS, 0) \ + KEYWORD(NS_CLOSED_ENUM, 0) \ + KEYWORD(NS_ENUM, 0) \ + KEYWORD(NS_OPTIONS, 0) \ + /* Javascript */ \ + KEYWORD(as, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(async, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(await, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(declare, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(finally, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(from, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(function, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(get, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(import, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(infer, 0) \ + KEYWORD(is, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(let, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(module, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(readonly, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(set, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(type, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(typeof, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(var, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(yield, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + /* Java */ \ + KEYWORD(abstract, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(assert, 0) \ + KEYWORD(extends, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(implements, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(instanceof, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(interface, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(native, 0) \ + KEYWORD(package, 0) \ + KEYWORD(synchronized, 0) \ + KEYWORD(throws, 0) \ + KEYWORD(__except, 0) \ + KEYWORD(__has_include, 0) \ + KEYWORD(__has_include_next, 0) \ + /* Pragma */ \ + KEYWORD(mark, 0) \ + /* Protobuf */ \ + KEYWORD(extend, 0) \ + KEYWORD(option, 0) \ + KEYWORD(optional, 0) \ + KEYWORD(repeated, 0) \ + KEYWORD(required, 0) \ + KEYWORD(returns, 0) \ + /* QT */ \ + KEYWORD(signals, 0) \ + KEYWORD(Q_SIGNALS, 0) \ + KEYWORD(slots, 0) \ + KEYWORD(Q_SLOTS, 0) \ + /* C# */ \ + KEYWORD(dollar, ATTR_CSHARP_KEYWORD) \ + KEYWORD(base, ATTR_CSHARP_KEYWORD) \ + KEYWORD(byte, ATTR_CSHARP_KEYWORD) \ + KEYWORD(checked, ATTR_CSHARP_KEYWORD) \ + KEYWORD(decimal, ATTR_CSHARP_KEYWORD) \ + KEYWORD(delegate, ATTR_CSHARP_KEYWORD) \ + KEYWORD(event, ATTR_CSHARP_KEYWORD) \ + KEYWORD(fixed, ATTR_CSHARP_KEYWORD) \ + KEYWORD(foreach, ATTR_CSHARP_KEYWORD) \ + KEYWORD(implicit, ATTR_CSHARP_KEYWORD) \ + KEYWORD(init, ATTR_CSHARP_KEYWORD) \ + KEYWORD(internal, ATTR_CSHARP_KEYWORD) \ + KEYWORD(lock, ATTR_CSHARP_KEYWORD) \ + KEYWORD(null, ATTR_CSHARP_KEYWORD) \ + KEYWORD(object, ATTR_CSHARP_KEYWORD) \ + KEYWORD(out, ATTR_CSHARP_KEYWORD) \ + KEYWORD(params, ATTR_CSHARP_KEYWORD) \ + KEYWORD(ref, ATTR_CSHARP_KEYWORD) \ + KEYWORD(string, ATTR_CSHARP_KEYWORD) \ + KEYWORD(stackalloc, ATTR_CSHARP_KEYWORD) \ + KEYWORD(sbyte, ATTR_CSHARP_KEYWORD) \ + KEYWORD(sealed, ATTR_CSHARP_KEYWORD) \ + KEYWORD(uint, ATTR_CSHARP_KEYWORD) \ + KEYWORD(ulong, ATTR_CSHARP_KEYWORD) \ + KEYWORD(unchecked, ATTR_CSHARP_KEYWORD) \ + KEYWORD(unsafe, ATTR_CSHARP_KEYWORD) \ + KEYWORD(ushort, ATTR_CSHARP_KEYWORD) \ + KEYWORD(when, ATTR_CSHARP_KEYWORD) \ + KEYWORD(where, ATTR_CSHARP_KEYWORD) + AdditionalKeywords(IdentifierTable &IdentTable) { - kw_final = &IdentTable.get("final"); - kw_override = &IdentTable.get("override"); - kw_in = &IdentTable.get("in"); - kw_of = &IdentTable.get("of"); - kw_CF_CLOSED_ENUM = &IdentTable.get("CF_CLOSED_ENUM"); - kw_CF_ENUM = &IdentTable.get("CF_ENUM"); - kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); - kw_NS_CLOSED_ENUM = &IdentTable.get("NS_CLOSED_ENUM"); - kw_NS_ENUM = &IdentTable.get("NS_ENUM"); - kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS"); +#define KEYWORD(WORD, ATTRS) kw_##WORD = &IdentTable.get(#WORD); + LIST_ADDITIONAL_KEYWORDS +#undef KEYWORD - kw_as = &IdentTable.get("as"); - kw_async = &IdentTable.get("async"); - kw_await = &IdentTable.get("await"); - kw_declare = &IdentTable.get("declare"); - kw_finally = &IdentTable.get("finally"); - kw_from = &IdentTable.get("from"); - kw_function = &IdentTable.get("function"); - kw_get = &IdentTable.get("get"); - kw_import = &IdentTable.get("import"); - kw_infer = &IdentTable.get("infer"); - kw_is = &IdentTable.get("is"); - kw_let = &IdentTable.get("let"); - kw_module = &IdentTable.get("module"); - kw_readonly = &IdentTable.get("readonly"); - kw_set = &IdentTable.get("set"); - kw_type = &IdentTable.get("type"); - kw_typeof = &IdentTable.get("typeof"); - kw_var = &IdentTable.get("var"); - kw_yield = &IdentTable.get("yield"); - - kw_abstract = &IdentTable.get("abstract"); - kw_assert = &IdentTable.get("assert"); - kw_extends = &IdentTable.get("extends"); - kw_implements = &IdentTable.get("implements"); - kw_instanceof = &IdentTable.get("instanceof"); - kw_interface = &IdentTable.get("interface"); - kw_native = &IdentTable.get("native"); - kw_package = &IdentTable.get("package"); - kw_synchronized = &IdentTable.get("synchronized"); - kw_throws = &IdentTable.get("throws"); - kw___except = &IdentTable.get("__except"); - kw___has_include = &IdentTable.get("__has_include"); - kw___has_include_next = &IdentTable.get("__has_include_next"); - - kw_mark = &IdentTable.get("mark"); - - kw_extend = &IdentTable.get("extend"); - kw_option = &IdentTable.get("option"); - kw_optional = &IdentTable.get("optional"); - kw_repeated = &IdentTable.get("repeated"); - kw_required = &IdentTable.get("required"); - kw_returns = &IdentTable.get("returns"); - - kw_signals = &IdentTable.get("signals"); - kw_qsignals = &IdentTable.get("Q_SIGNALS"); - kw_slots = &IdentTable.get("slots"); - kw_qslots = &IdentTable.get("Q_SLOTS"); - - // For internal clang-format use. kw_internal_ident_after_define = &IdentTable.get("__CLANG_FORMAT_INTERNAL_IDENT_AFTER_DEFINE__"); - // C# keywords - kw_dollar = &IdentTable.get("dollar"); - kw_base = &IdentTable.get("base"); - kw_byte = &IdentTable.get("byte"); - kw_checked = &IdentTable.get("checked"); - kw_decimal = &IdentTable.get("decimal"); - kw_delegate = &IdentTable.get("delegate"); - kw_event = &IdentTable.get("event"); - kw_fixed = &IdentTable.get("fixed"); - kw_foreach = &IdentTable.get("foreach"); - kw_init = &IdentTable.get("init"); - kw_implicit = &IdentTable.get("implicit"); - kw_internal = &IdentTable.get("internal"); - kw_lock = &IdentTable.get("lock"); - kw_null = &IdentTable.get("null"); - kw_object = &IdentTable.get("object"); - kw_out = &IdentTable.get("out"); - kw_params = &IdentTable.get("params"); - kw_ref = &IdentTable.get("ref"); - kw_string = &IdentTable.get("string"); - kw_stackalloc = &IdentTable.get("stackalloc"); - kw_sbyte = &IdentTable.get("sbyte"); - kw_sealed = &IdentTable.get("sealed"); - kw_uint = &IdentTable.get("uint"); - kw_ulong = &IdentTable.get("ulong"); - kw_unchecked = &IdentTable.get("unchecked"); - kw_unsafe = &IdentTable.get("unsafe"); - kw_ushort = &IdentTable.get("ushort"); - kw_when = &IdentTable.get("when"); - kw_where = &IdentTable.get("where"); - - // Keep this at the end of the constructor to make sure everything here - // is - // already initialized. - JsExtraKeywords = std::unordered_set( - {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, - kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_override, - kw_readonly, kw_set, kw_type, kw_typeof, kw_var, kw_yield, - // Keywords from the Java section. - kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); - - CSharpExtraKeywords = std::unordered_set( - {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event, - kw_fixed, kw_foreach, kw_implicit, kw_in, kw_init, kw_interface, - kw_internal, kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, - kw_params, kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, - kw_sealed, kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, - kw_when, kw_where, - // Keywords from the JavaScript section. - kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, - kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, - kw_set, kw_type, kw_typeof, kw_var, kw_yield, - // Keywords from the Java section. - kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); + // Keep this at the end of the constructor to make sure everything + // here is already initialized. +#define KEYWORD(WORD, ATTRS) {kw_##WORD, (ATTRS)}, + KeywordAttr = {LIST_ADDITIONAL_KEYWORDS}; +#undef KEYWORD } - // Context sensitive keywords. - IdentifierInfo *kw_final; - IdentifierInfo *kw_override; - IdentifierInfo *kw_in; - IdentifierInfo *kw_of; - IdentifierInfo *kw_CF_CLOSED_ENUM; - IdentifierInfo *kw_CF_ENUM; - IdentifierInfo *kw_CF_OPTIONS; - IdentifierInfo *kw_NS_CLOSED_ENUM; - IdentifierInfo *kw_NS_ENUM; - IdentifierInfo *kw_NS_OPTIONS; - IdentifierInfo *kw___except; - IdentifierInfo *kw___has_include; - IdentifierInfo *kw___has_include_next; - - // JavaScript keywords. - IdentifierInfo *kw_as; - IdentifierInfo *kw_async; - IdentifierInfo *kw_await; - IdentifierInfo *kw_declare; - IdentifierInfo *kw_finally; - IdentifierInfo *kw_from; - IdentifierInfo *kw_function; - IdentifierInfo *kw_get; - IdentifierInfo *kw_import; - IdentifierInfo *kw_infer; - IdentifierInfo *kw_is; - IdentifierInfo *kw_let; - IdentifierInfo *kw_module; - IdentifierInfo *kw_readonly; - IdentifierInfo *kw_set; - IdentifierInfo *kw_type; - IdentifierInfo *kw_typeof; - IdentifierInfo *kw_var; - IdentifierInfo *kw_yield; - - // Java keywords. - IdentifierInfo *kw_abstract; - IdentifierInfo *kw_assert; - IdentifierInfo *kw_extends; - IdentifierInfo *kw_implements; - IdentifierInfo *kw_instanceof; - IdentifierInfo *kw_interface; - IdentifierInfo *kw_native; - IdentifierInfo *kw_package; - IdentifierInfo *kw_synchronized; - IdentifierInfo *kw_throws; - - // Pragma keywords. - IdentifierInfo *kw_mark; - - // Proto keywords. - IdentifierInfo *kw_extend; - IdentifierInfo *kw_option; - IdentifierInfo *kw_optional; - IdentifierInfo *kw_repeated; - IdentifierInfo *kw_required; - IdentifierInfo *kw_returns; + enum { + ATTR_JS_KEYWORD = 0x1, + ATTR_CSHARP_KEYWORD = 0x2, + }; + unsigned getAttrs(const FormatToken &Tok) const { + auto At = KeywordAttr.find(Tok.Tok.getIdentifierInfo()); + return At == KeywordAttr.end() ? 0u : At->second; + } - // QT keywords. - IdentifierInfo *kw_signals; - IdentifierInfo *kw_qsignals; - IdentifierInfo *kw_slots; - IdentifierInfo *kw_qslots; +#define KEYWORD(WORD, ATTRS) IdentifierInfo *kw_##WORD; + LIST_ADDITIONAL_KEYWORDS +#undef KEYWORD // For internal use by clang-format. IdentifierInfo *kw_internal_ident_after_define; - // C# keywords - IdentifierInfo *kw_dollar; - IdentifierInfo *kw_base; - IdentifierInfo *kw_byte; - IdentifierInfo *kw_checked; - IdentifierInfo *kw_decimal; - IdentifierInfo *kw_delegate; - IdentifierInfo *kw_event; - IdentifierInfo *kw_fixed; - IdentifierInfo *kw_foreach; - IdentifierInfo *kw_implicit; - IdentifierInfo *kw_init; - IdentifierInfo *kw_internal; - - IdentifierInfo *kw_lock; - IdentifierInfo *kw_null; - IdentifierInfo *kw_object; - IdentifierInfo *kw_out; - - IdentifierInfo *kw_params; - - IdentifierInfo *kw_ref; - IdentifierInfo *kw_string; - IdentifierInfo *kw_stackalloc; - IdentifierInfo *kw_sbyte; - IdentifierInfo *kw_sealed; - IdentifierInfo *kw_uint; - IdentifierInfo *kw_ulong; - IdentifierInfo *kw_unchecked; - IdentifierInfo *kw_unsafe; - IdentifierInfo *kw_ushort; - IdentifierInfo *kw_when; - IdentifierInfo *kw_where; - /// Returns \c true if \p Tok is a true JavaScript identifier, returns /// \c false if it is a keyword or a pseudo keyword. /// If \c AcceptIdentifierName is true, returns true not only for keywords, @@ -1145,22 +1047,17 @@ case tok::kw_while: // These are JS keywords that are lexed by LLVM/clang as keywords. return false; - case tok::identifier: { - // For identifiers, make sure they are true identifiers, excluding the - // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords). - bool IsPseudoKeyword = - JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) != - JsExtraKeywords.end(); - return AcceptIdentifierName || !IsPseudoKeyword; - } + case tok::identifier: + return AcceptIdentifierName || !(getAttrs(Tok) & ATTR_JS_KEYWORD); default: - // Other keywords are handled in the switch below, to avoid problems due - // to duplicate case labels when using the #include trick. - break; + return isCXXKeyword(Tok); } + } + /// Returns whether \p Tok is a keyword defined in the C++ lexer, that + /// is, not including punctuation. + bool isCXXKeyword(const FormatToken &Tok) const { switch (Tok.Tok.getKind()) { - // Handle C++ keywords not included above: these are all JS identifiers. #define KEYWORD(X, Y) case tok::kw_##X: #include "clang/Basic/TokenKinds.def" // #undef KEYWORD is not needed -- it's #undef-ed at the end of @@ -1221,19 +1118,17 @@ case tok::kw_volatile: case tok::kw_while: return true; + case tok::identifier: + return getAttrs(Tok) & ATTR_CSHARP_KEYWORD; default: - return Tok.is(tok::identifier) && - CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == - CSharpExtraKeywords.end(); + return false; } } private: - /// The JavaScript keywords beyond the C++ keyword set. - std::unordered_set JsExtraKeywords; + std::unordered_map KeywordAttr; - /// The C# keywords beyond the C++ keyword set - std::unordered_set CSharpExtraKeywords; +#undef LIST_ADDITIONAL_KEYWORDS }; } // namespace format diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -112,11 +112,12 @@ else if (RootToken.isObjCAccessSpecifier()) return true; // Handle Qt signals. - else if ((RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) && + else if ((RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_Q_SIGNALS) && RootToken.Next && RootToken.Next->is(tok::colon))) return true; else if (RootToken.Next && - RootToken.Next->isOneOf(Keywords.kw_slots, Keywords.kw_qslots) && + RootToken.Next->isOneOf(Keywords.kw_slots, + Keywords.kw_Q_SLOTS) && RootToken.Next->Next && RootToken.Next->Next->is(tok::colon)) return true; // Handle malformed access specifier e.g. 'private' without trailing ':'. diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1481,8 +1481,8 @@ } } if (Style.isCpp() && - FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, - Keywords.kw_slots, Keywords.kw_qslots)) { + FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_Q_SIGNALS, + Keywords.kw_slots, Keywords.kw_Q_SLOTS)) { nextToken(); if (FormatTok->is(tok::colon)) { nextToken(); @@ -2902,7 +2902,7 @@ FormatToken *AccessSpecifierCandidate = FormatTok; nextToken(); // Understand Qt's slots. - if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) + if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_Q_SLOTS)) nextToken(); // Otherwise, we don't know what it is, and we'd better keep the next token. if (FormatTok->is(tok::colon)) {