Index: docs/ClangFormat.rst =================================================================== --- docs/ClangFormat.rst +++ docs/ClangFormat.rst @@ -11,12 +11,12 @@ =============== :program:`clang-format` is located in `clang/tools/clang-format` and can be used -to format C/C++/Java/JavaScript/Objective-C/Protobuf code. +to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code. .. code-block:: console $ clang-format -help - OVERVIEW: A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf code. + OVERVIEW: A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code. If no arguments are specified, it formats the code from standard input and writes the result to the standard output. Index: docs/ClangFormatStyleOptions.rst =================================================================== --- docs/ClangFormatStyleOptions.rst +++ docs/ClangFormatStyleOptions.rst @@ -68,6 +68,10 @@ Language: Proto # Don't format .proto files. DisableFormat: true + --- + Language: CSharp + # Use 100 columns for C#. + ColumnLimit: 100 ... An easy way to get a valid ``.clang-format`` file containing all configuration @@ -1496,6 +1500,9 @@ * ``LK_Cpp`` (in configuration: ``Cpp``) Should be used for C, C++. + * ``LK_CSharp`` (in configuration: ``CSharp``) + Should be used for C#. + * ``LK_Java`` (in configuration: ``Java``) Should be used for Java. Index: docs/ReleaseNotes.rst =================================================================== --- docs/ReleaseNotes.rst +++ docs/ReleaseNotes.rst @@ -158,7 +158,7 @@ ------------ -- ... +- Add language support for clang-formatting C# files libclang -------- Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -92,6 +92,8 @@ LANGOPT(CPlusPlus17 , 1, 0, "C++17") LANGOPT(CPlusPlus2a , 1, 0, "C++2a") LANGOPT(ObjC , 1, 0, "Objective-C") +LANGOPT(CSharp , 1, 0, "C#") + BENIGN_LANGOPT(ObjCDefaultSynthProperties , 1, 0, "Objective-C auto-synthesized properties") BENIGN_LANGOPT(EncodeExtendedBlockSig , 1, 0, Index: include/clang/Basic/TokenKinds.h =================================================================== --- include/clang/Basic/TokenKinds.h +++ include/clang/Basic/TokenKinds.h @@ -77,7 +77,7 @@ inline bool isStringLiteral(TokenKind K) { return K == tok::string_literal || K == tok::wide_string_literal || K == tok::utf8_string_literal || K == tok::utf16_string_literal || - K == tok::utf32_string_literal; + K == tok::utf32_string_literal || K == tok::verbatim_string_literal; } /// Return true if this is a "literal" kind, like a numeric @@ -86,7 +86,8 @@ return K == tok::numeric_constant || K == tok::char_constant || K == tok::wide_char_constant || K == tok::utf8_char_constant || K == tok::utf16_char_constant || K == tok::utf32_char_constant || - isStringLiteral(K) || K == tok::angle_string_literal; + isStringLiteral(K) || K == tok::angle_string_literal || + K == tok::verbatim_string_literal; } /// Return true if this is any of tok::annot_* kinds. Index: include/clang/Basic/TokenKinds.def =================================================================== --- include/clang/Basic/TokenKinds.def +++ include/clang/Basic/TokenKinds.def @@ -218,6 +218,10 @@ // Objective C support. PUNCTUATOR(at, "@") +// C# support +PUNCTUATOR(dollar, "$") +TOK(verbatim_string_literal) // @"foo" + // CUDA support. PUNCTUATOR(lesslessless, "<<<") PUNCTUATOR(greatergreatergreater, ">>>") Index: include/clang/Format/Format.h =================================================================== --- include/clang/Format/Format.h +++ include/clang/Format/Format.h @@ -354,38 +354,38 @@ /// Different ways to break after the template declaration. enum BreakTemplateDeclarationsStyle { - /// Do not force break before declaration. - /// ``PenaltyBreakTemplateDeclaration`` is taken into account. - /// \code - /// template T foo() { - /// } - /// template T foo(int aaaaaaaaaaaaaaaaaaaaa, - /// int bbbbbbbbbbbbbbbbbbbbb) { - /// } - /// \endcode - BTDS_No, - /// Force break after template declaration only when the following - /// declaration spans multiple lines. - /// \code - /// template T foo() { - /// } - /// template - /// T foo(int aaaaaaaaaaaaaaaaaaaaa, - /// int bbbbbbbbbbbbbbbbbbbbb) { - /// } - /// \endcode - BTDS_MultiLine, - /// Always break after template declaration. - /// \code - /// template - /// T foo() { - /// } - /// template - /// T foo(int aaaaaaaaaaaaaaaaaaaaa, - /// int bbbbbbbbbbbbbbbbbbbbb) { - /// } - /// \endcode - BTDS_Yes + /// Do not force break before declaration. + /// ``PenaltyBreakTemplateDeclaration`` is taken into account. + /// \code + /// template T foo() { + /// } + /// template T foo(int aaaaaaaaaaaaaaaaaaaaa, + /// int bbbbbbbbbbbbbbbbbbbbb) { + /// } + /// \endcode + BTDS_No, + /// Force break after template declaration only when the following + /// declaration spans multiple lines. + /// \code + /// template T foo() { + /// } + /// template + /// T foo(int aaaaaaaaaaaaaaaaaaaaa, + /// int bbbbbbbbbbbbbbbbbbbbb) { + /// } + /// \endcode + BTDS_MultiLine, + /// Always break after template declaration. + /// \code + /// template + /// T foo() { + /// } + /// template + /// T foo(int aaaaaaaaaaaaaaaaaaaaa, + /// int bbbbbbbbbbbbbbbbbbbbb) { + /// } + /// \endcode + BTDS_Yes }; /// The template declaration breaking style to use. @@ -1219,6 +1219,8 @@ LK_None, /// Should be used for C, C++. LK_Cpp, + /// Should be used for C#. + LK_CSharp, /// Should be used for Java. LK_Java, /// Should be used for JavaScript. @@ -1235,6 +1237,7 @@ LK_TextProto }; bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; } + bool isCSharp() const { return Language == LK_CSharp; } /// Language, this format style is targeted at. LanguageKind Language; @@ -2050,6 +2053,8 @@ switch (Language) { case FormatStyle::LK_Cpp: return "C++"; + case FormatStyle::LK_CSharp: + return "CSharp"; case FormatStyle::LK_ObjC: return "Objective-C"; case FormatStyle::LK_Java: @@ -2071,6 +2076,6 @@ namespace std { template <> struct is_error_code_enum : std::true_type {}; -} +} // end namespace std #endif // LLVM_CLANG_FORMAT_FORMAT_H Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -419,7 +419,7 @@ if (Style.AlwaysBreakBeforeMultilineStrings && (NewLineColumn == State.FirstIndent + Style.ContinuationIndentWidth || Previous.is(tok::comma) || Current.NestingLevel < 2) && - !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at) && + !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at, tok::dollar) && !Previous.isOneOf(TT_InlineASMColon, TT_ConditionalExpr) && nextIsMultilineString(State)) return true; @@ -1159,6 +1159,8 @@ if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0) State.StartOfStringLiteral = State.Column + 1; + if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0) + State.StartOfStringLiteral = State.Column + 1; else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) State.StartOfStringLiteral = State.Column; else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && @@ -1712,12 +1714,12 @@ const FormatToken &Current, LineState &State, bool AllowBreak) { unsigned StartColumn = State.Column - Current.ColumnWidth; if (Current.isStringLiteral()) { - // FIXME: String literal breaking is currently disabled for Java and JS, as - // it requires strings to be merged using "+" which we don't support. + // FIXME: String literal breaking is currently disabled for C#,Java and + // JavaScript, as it requires strings to be merged using "+" which we + // don't support. if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript || - !Style.BreakStringLiterals || - !AllowBreak) + Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp() || + !Style.BreakStringLiterals || !AllowBreak) return nullptr; // Don't break string literals inside preprocessor directives (except for Index: lib/Format/Format.cpp =================================================================== --- lib/Format/Format.cpp +++ lib/Format/Format.cpp @@ -61,6 +61,7 @@ IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen); IO.enumCase(Value, "TextProto", FormatStyle::LK_TextProto); + IO.enumCase(Value, "CSharp", FormatStyle::LK_CSharp); } }; @@ -2084,7 +2085,6 @@ if (HeaderInsertions.empty() && HeadersToDelete.empty()) return Replaces; - StringRef FileName = Replaces.begin()->getFilePath(); tooling::HeaderIncludes Includes(FileName, Code, Style.IncludeStyle); @@ -2275,6 +2275,7 @@ LangOpts.ObjC = 1; LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. LangOpts.DeclSpecKeyword = 1; // To get __declspec. + LangOpts.CSharp = Style.isCSharp(); return LangOpts; } @@ -2306,6 +2307,8 @@ return FormatStyle::LK_TextProto; if (FileName.endswith_lower(".td")) return FormatStyle::LK_TableGen; + if (FileName.endswith_lower(".cs")) + return FormatStyle::LK_CSharp; return FormatStyle::LK_Cpp; } Index: lib/Format/FormatToken.h =================================================================== --- lib/Format/FormatToken.h +++ lib/Format/FormatToken.h @@ -95,6 +95,7 @@ TYPE(TrailingReturnArrow) \ TYPE(TrailingUnaryOperator) \ TYPE(UnaryOperator) \ + TYPE(CSharpStringLiteral) \ TYPE(Unknown) enum TokenType { @@ -358,11 +359,10 @@ bool isSimpleTypeSpecifier() const; bool isObjCAccessSpecifier() const { - return is(tok::at) && Next && - (Next->isObjCAtKeyword(tok::objc_public) || - Next->isObjCAtKeyword(tok::objc_protected) || - Next->isObjCAtKeyword(tok::objc_package) || - Next->isObjCAtKeyword(tok::objc_private)); + return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) || + Next->isObjCAtKeyword(tok::objc_protected) || + Next->isObjCAtKeyword(tok::objc_package) || + Next->isObjCAtKeyword(tok::objc_private)); } /// Returns whether \p Tok is ([{ or an opening < of a template or in @@ -489,8 +489,7 @@ bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { if (is(TT_TemplateString) && opensScope()) return true; - return is(TT_ArrayInitializerLSquare) || - is(TT_ProtoExtensionLSquare) || + return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) || (is(tok::l_brace) && (BlockKind == BK_Block || is(TT_DictLiteral) || (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || @@ -506,8 +505,9 @@ const FormatToken *T = this; do { T = T->getPreviousNonComment(); - } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp, - tok::ampamp)); + } while ( + T && + T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)); return T && T->is(tok::kw_auto); } @@ -723,7 +723,35 @@ kw_slots = &IdentTable.get("slots"); kw_qslots = &IdentTable.get("Q_SLOTS"); - // Keep this at the end of the constructor to make sure everything here is + // C# keywords + kw_base = &IdentTable.get("base"); + kw_byte = &IdentTable.get("byte"); + kw_checked = &IdentTable.get("checked"); + kw_decimal = &IdentTable.get("decimal"); + kw_delegate = &IdentTable.get("delegate"); + kw_event = &IdentTable.get("event"); + kw_fixed = &IdentTable.get("fixed"); + kw_foreach = &IdentTable.get("foreach"); + kw_implicit = &IdentTable.get("implicit"); + kw_internal = &IdentTable.get("internal"); + kw_lock = &IdentTable.get("lock"); + kw_null = &IdentTable.get("null"); + kw_object = &IdentTable.get("object"); + kw_out = &IdentTable.get("out"); + kw_params = &IdentTable.get("params"); + kw_ref = &IdentTable.get("ref"); + kw_string = &IdentTable.get("string"); + kw_stackalloc = &IdentTable.get("stackalloc"); + kw_sbyte = &IdentTable.get("sbyte"); + kw_sealed = &IdentTable.get("sealed"); + kw_uint = &IdentTable.get("uint"); + kw_ulong = &IdentTable.get("ulong"); + kw_unchecked = &IdentTable.get("unchecked"); + kw_unsafe = &IdentTable.get("unsafe"); + kw_ushort = &IdentTable.get("ushort"); + + // Keep this at the end of the constructor to make sure everything here + // is // already initialized. JsExtraKeywords = std::unordered_set( {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, @@ -731,6 +759,19 @@ kw_set, kw_type, kw_typeof, kw_var, kw_yield, // Keywords from the Java section. kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); + + CSharpExtraKeywords = std::unordered_set( + {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event, + kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal, + kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params, + kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed, + kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, + // Keywords from the JavaScript section. + kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, + kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, + kw_set, kw_type, kw_typeof, kw_var, kw_yield, + // Keywords from the Java section. + kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); } // Context sensitive keywords. @@ -796,6 +837,36 @@ IdentifierInfo *kw_slots; IdentifierInfo *kw_qslots; + // C# keywords + IdentifierInfo *kw_base; + IdentifierInfo *kw_byte; + IdentifierInfo *kw_checked; + IdentifierInfo *kw_decimal; + IdentifierInfo *kw_delegate; + IdentifierInfo *kw_event; + IdentifierInfo *kw_fixed; + IdentifierInfo *kw_foreach; + IdentifierInfo *kw_implicit; + IdentifierInfo *kw_internal; + + IdentifierInfo *kw_lock; + IdentifierInfo *kw_null; + IdentifierInfo *kw_object; + IdentifierInfo *kw_out; + + IdentifierInfo *kw_params; + + IdentifierInfo *kw_ref; + IdentifierInfo *kw_string; + IdentifierInfo *kw_stackalloc; + IdentifierInfo *kw_sbyte; + IdentifierInfo *kw_sealed; + IdentifierInfo *kw_uint; + IdentifierInfo *kw_ulong; + IdentifierInfo *kw_unchecked; + IdentifierInfo *kw_unsafe; + IdentifierInfo *kw_ushort; + /// Returns \c true if \p Tok is a true JavaScript identifier, returns /// \c false if it is a keyword or a pseudo keyword. bool IsJavaScriptIdentifier(const FormatToken &Tok) const { @@ -804,9 +875,68 @@ JsExtraKeywords.end(); } + /// Returns \c true if \p Tok is a C# keyword, returns + /// \c false if it is a anything else. + bool isCSharpKeyword(const FormatToken &Tok) const { + switch (Tok.Tok.getKind()) { + case tok::kw_bool: + case tok::kw_break: + case tok::kw_case: + case tok::kw_catch: + case tok::kw_char: + case tok::kw_class: + case tok::kw_const: + case tok::kw_continue: + case tok::kw_default: + case tok::kw_do: + case tok::kw_double: + case tok::kw_else: + case tok::kw_enum: + case tok::kw_explicit: + case tok::kw_extern: + case tok::kw_false: + case tok::kw_float: + case tok::kw_for: + case tok::kw_goto: + case tok::kw_if: + case tok::kw_int: + case tok::kw_long: + case tok::kw_namespace: + case tok::kw_new: + case tok::kw_operator: + case tok::kw_private: + case tok::kw_protected: + case tok::kw_public: + case tok::kw_return: + case tok::kw_short: + case tok::kw_sizeof: + case tok::kw_static: + case tok::kw_struct: + case tok::kw_switch: + case tok::kw_this: + case tok::kw_throw: + case tok::kw_true: + case tok::kw_try: + case tok::kw_typeof: + case tok::kw_using: + case tok::kw_virtual: + case tok::kw_void: + case tok::kw_volatile: + case tok::kw_while: + return true; + default: + return Tok.is(tok::identifier) && + CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == + CSharpExtraKeywords.end(); + } + } + private: /// The JavaScript keywords beyond the C++ keyword set. std::unordered_set JsExtraKeywords; + + /// The C# keywords beyond the C++ keyword set + std::unordered_set CSharpExtraKeywords; }; } // namespace format Index: lib/Format/FormatTokenLexer.h =================================================================== --- lib/Format/FormatTokenLexer.h +++ lib/Format/FormatTokenLexer.h @@ -20,8 +20,8 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" -#include "llvm/Support/Regex.h" #include "llvm/ADT/MapVector.h" +#include "llvm/Support/Regex.h" #include @@ -48,6 +48,9 @@ bool tryMergeLessLess(); bool tryMergeNSStringLiteral(); + bool tryMergeCSharpStringLiteral(); + bool tryMergeCSharpKeywordVariables(); + bool tryMergeCSharpNullConditionals(); bool tryMergeTokens(ArrayRef Kinds, TokenType NewType); Index: lib/Format/FormatTokenLexer.cpp =================================================================== --- lib/Format/FormatTokenLexer.cpp +++ lib/Format/FormatTokenLexer.cpp @@ -66,6 +66,19 @@ return; if (tryMergeLessLess()) return; + + if (Style.isCSharp()) { + if (tryMergeCSharpStringLiteral()) + return; + if (tryMergeCSharpKeywordVariables()) + return; + if (tryMergeCSharpNullConditionals()) + return; + static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater}; + if (tryMergeTokens(JSRightArrow, TT_JsFatArrow)) + return; + } + if (tryMergeNSStringLiteral()) return; @@ -110,7 +123,8 @@ return false; auto &At = *(Tokens.end() - 2); auto &String = *(Tokens.end() - 1); - if (!At->is(tok::at) || !String->is(tok::string_literal)) + if (!At->is(tok::at) || + !String->isOneOf(tok::string_literal, tok::verbatim_string_literal)) return false; At->Tok.setKind(tok::string_literal); At->TokenText = StringRef(At->TokenText.begin(), @@ -121,6 +135,82 @@ return true; } +// Search for interpolated string literals $"aaaaa{abc}aaaaa" and mark token +// as TT_CSharpStringLiteral, and to prevent splitting of $ and ". +bool FormatTokenLexer::tryMergeCSharpStringLiteral() { + if (Tokens.size() < 2) + return false; + auto &At = *(Tokens.end() - 2); + auto &String = *(Tokens.end() - 1); + + // Look for $"aaaaaa" @"aaaaaa". + if (!(At->is(tok::at) || At->TokenText == "$") || + !String->isOneOf(tok::string_literal, tok::verbatim_string_literal)) + return false; + + // Look for $@"aaaaaa" + if (Tokens.size() >= 2 && At->is(tok::at)) { + auto &Dollar = *(Tokens.end() - 3); + if (Dollar->TokenText == "$") { + // this looks like $@"aaaaa" so we need to combine all 3 + Dollar->Tok.setKind(tok::string_literal); + Dollar->TokenText = + StringRef(Dollar->TokenText.begin(), + String->TokenText.end() - Dollar->TokenText.begin()); + Dollar->ColumnWidth += (At->ColumnWidth + String->ColumnWidth); + Dollar->Type = TT_CSharpStringLiteral; + Tokens.erase(Tokens.end() - 2); + return true; + } + } + + // Convert back into just a string_literal + At->Tok.setKind(tok::string_literal); + At->TokenText = StringRef(At->TokenText.begin(), + String->TokenText.end() - At->TokenText.begin()); + At->ColumnWidth += String->ColumnWidth; + At->Type = TT_CSharpStringLiteral; + Tokens.erase(Tokens.end() - 1); + return true; +} + +bool FormatTokenLexer::tryMergeCSharpKeywordVariables() { + if (Tokens.size() < 2) + return false; + auto &At = *(Tokens.end() - 2); + auto &Keyword = *(Tokens.end() - 1); + if (!At->is(tok::at)) + return false; + if (!Keywords.isCSharpKeyword(*Keyword)) + return false; + + At->Tok.setKind(tok::identifier); + At->TokenText = StringRef(At->TokenText.begin(), + Keyword->TokenText.end() - At->TokenText.begin()); + At->ColumnWidth += Keyword->ColumnWidth; + At->Type = Keyword->Type; + Tokens.erase(Tokens.end() - 1); + return true; +} + +// in C# merge the Identifier and the ? together arg? +bool FormatTokenLexer::tryMergeCSharpNullConditionals() { + if (Tokens.size() < 2) + return false; + auto &Identifier = *(Tokens.end() - 2); + auto &Question = *(Tokens.end() - 1); + if (!Identifier->isOneOf(tok::r_square, tok::identifier) || + !Question->is(tok::question)) + return false; + Identifier->TokenText = + StringRef(Identifier->TokenText.begin(), + Question->TokenText.end() - Identifier->TokenText.begin()); + Identifier->ColumnWidth += Question->ColumnWidth; + Identifier->Type = Identifier->Type; + Tokens.erase(Tokens.end() - 1); + return true; +} + bool FormatTokenLexer::tryMergeLessLess() { // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. if (Tokens.size() < 3) Index: lib/Format/TokenAnnotator.cpp =================================================================== --- lib/Format/TokenAnnotator.cpp +++ lib/Format/TokenAnnotator.cpp @@ -298,6 +298,8 @@ CurrentToken->Type = TT_JavaAnnotation; if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation)) CurrentToken->Type = TT_LeadingJavaAnnotation; + if (Left->Previous && Left->Previous->is(TT_AttributeSquare)) + CurrentToken->Type = TT_AttributeSquare; if (!HasMultipleLines) Left->PackingKind = PPK_Inconclusive; @@ -348,6 +350,37 @@ return false; } + bool isCSharpAttributeSpecifier(const FormatToken &Tok) { + if (!Style.isCSharp()) + return false; + + const FormatToken *AttrTok = Tok.Next; + if (!AttrTok) + return false; + + // Just an empty declaration e.g. string []. + if (AttrTok->is(tok::r_square)) + return false; + + // Move along the tokens inbetween the '[' and ']' e.g. [STAThread] + while (AttrTok && AttrTok->isNot(tok::r_square)) { + AttrTok = AttrTok->Next; + } + + // move past the end of ']' + AttrTok = AttrTok->Next; + if (!AttrTok) + return false; + + // limit this to being an access modifier that follows + if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected, + tok::kw_class, tok::kw_static, tok::l_square, + Keywords.kw_internal)) { + return true; + } + return false; + } + bool isCpp11AttributeSpecifier(const FormatToken &Tok) { if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square)) return false; @@ -398,6 +431,11 @@ bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) || Contexts.back().InCpp11AttributeSpecifier; + // Treat C# Attributes [STAThread] much like C++ attributes [[...]]. + bool IsCSharp11AttributeSpecifier = + isCSharpAttributeSpecifier(*Left) || + Contexts.back().InCSharpAttributeSpecifier; + bool InsideInlineASM = Line.startsWith(tok::kw_asm); bool StartsObjCMethodExpr = !InsideInlineASM && !CppArrayTemplates && Style.isCpp() && @@ -475,6 +513,8 @@ // Should only be relevant to JavaScript: tok::kw_default)) { Left->Type = TT_ArrayInitializerLSquare; + } else if (IsCSharp11AttributeSpecifier) { + Left->Type = TT_AttributeSquare; } else { BindingIncrease = 10; Left->Type = TT_ArraySubscriptLSquare; @@ -489,11 +529,14 @@ Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr; Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier; + Contexts.back().InCSharpAttributeSpecifier = IsCSharp11AttributeSpecifier; while (CurrentToken) { if (CurrentToken->is(tok::r_square)) { if (IsCpp11AttributeSpecifier) CurrentToken->Type = TT_AttributeSquare; + if (IsCSharp11AttributeSpecifier) + CurrentToken->Type = TT_AttributeSquare; else if (((CurrentToken->Next && CurrentToken->Next->is(tok::l_paren)) || (CurrentToken->Previous && @@ -1190,6 +1233,7 @@ bool CaretFound = false; bool IsForEachMacro = false; bool InCpp11AttributeSpecifier = false; + bool InCSharpAttributeSpecifier = false; }; /// Puts a new \c Context onto the stack \c Contexts for the lifetime @@ -2620,7 +2664,7 @@ // and "%d %d" if (Left.is(tok::numeric_constant) && Right.is(tok::percent)) return Right.WhitespaceRange.getEnd() != Right.WhitespaceRange.getBegin(); - } else if (Style.Language == FormatStyle::LK_JavaScript) { + } else if (Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) { if (Left.is(TT_JsFatArrow)) return true; // for await ( ... @@ -2967,6 +3011,14 @@ if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine) return true; + // Put multiple C# attributes on a new line. + if (Style.isCSharp() && + ((Left.is(TT_AttributeSquare) && Left.is(tok::r_square)) || + (Left.is(tok::r_square) && Right.is(TT_AttributeSquare) && + Right.is(tok::l_square)))) + return true; + + // Put multiple Java annotation on a new line. if ((Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript) && Left.is(TT_LeadingJavaAnnotation) && Index: lib/Format/UnwrappedLineFormatter.cpp =================================================================== --- lib/Format/UnwrappedLineFormatter.cpp +++ lib/Format/UnwrappedLineFormatter.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "NamespaceEndCommentsFixer.h" #include "UnwrappedLineFormatter.h" +#include "NamespaceEndCommentsFixer.h" #include "WhitespaceManager.h" #include "llvm/Support/Debug.h" #include @@ -94,7 +94,7 @@ /// characters to the left from their level. int getIndentOffset(const FormatToken &RootToken) { if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) + Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) return 0; if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier() || @@ -232,8 +232,9 @@ if (Tok && Tok->is(tok::kw_typedef)) Tok = Tok->getNextNonComment(); - if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union, - tok::kw_extern, Keywords.kw_interface)) + if (Tok && + Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union, + tok::kw_extern, Keywords.kw_interface)) return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock ? tryMergeSimpleBlock(I, E, Limit) : 0; @@ -690,10 +691,8 @@ /// Formats an \c AnnotatedLine and returns the penalty. /// /// If \p DryRun is \c false, directly applies the changes. - virtual unsigned formatLine(const AnnotatedLine &Line, - unsigned FirstIndent, - unsigned FirstStartColumn, - bool DryRun) = 0; + virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + unsigned FirstStartColumn, bool DryRun) = 0; protected: /// If the \p State's next token is an r_brace closing a nested block, @@ -916,8 +915,8 @@ Penalty = Queue.top().first.first; StateNode *Node = Queue.top().second; if (!Node->State.NextToken) { - LLVM_DEBUG(llvm::dbgs() - << "\n---\nPenalty for line: " << Penalty << "\n"); + LLVM_DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty + << "\n"); break; } Queue.pop(); @@ -949,8 +948,8 @@ if (!DryRun) reconstructPath(InitialState, Queue.top().second); - LLVM_DEBUG(llvm::dbgs() - << "Total number of analyzed states: " << Count << "\n"); + LLVM_DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count + << "\n"); LLVM_DEBUG(llvm::dbgs() << "---\n"); return Penalty; @@ -1008,13 +1007,10 @@ } // anonymous namespace -unsigned -UnwrappedLineFormatter::format(const SmallVectorImpl &Lines, - bool DryRun, int AdditionalIndent, - bool FixBadIndentation, - unsigned FirstStartColumn, - unsigned NextStartColumn, - unsigned LastStartColumn) { +unsigned UnwrappedLineFormatter::format( + const SmallVectorImpl &Lines, bool DryRun, + int AdditionalIndent, bool FixBadIndentation, unsigned FirstStartColumn, + unsigned NextStartColumn, unsigned LastStartColumn) { LineJoiner Joiner(Style, Keywords, Lines); // Try to look up already computed penalty in DryRun-mode. @@ -1035,8 +1031,8 @@ unsigned RangeMinLevel = UINT_MAX; bool FirstLine = true; - for (const AnnotatedLine *Line = - Joiner.getNextMergedLine(DryRun, IndentTracker); + for (const AnnotatedLine * + Line = Joiner.getNextMergedLine(DryRun, IndentTracker); Line; Line = NextLine, FirstLine = false) { const AnnotatedLine &TheLine = *Line; unsigned Indent = IndentTracker.getIndent(); @@ -1076,7 +1072,9 @@ TheLine.Last->TotalLength + Indent <= ColumnLimit || (TheLine.Type == LT_ImportStatement && (Style.Language != FormatStyle::LK_JavaScript || - !Style.JavaScriptWrapImports)); + !Style.JavaScriptWrapImports)) || + (Style.isCSharp() && + TheLine.InPPDirective); // don't split #regions in C# if (Style.ColumnLimit == 0) NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) .formatLine(TheLine, NextStartColumn + Indent, Index: lib/Format/UnwrappedLineParser.cpp =================================================================== --- lib/Format/UnwrappedLineParser.cpp +++ lib/Format/UnwrappedLineParser.cpp @@ -999,7 +999,8 @@ case tok::kw_protected: case tok::kw_private: if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) + Style.Language == FormatStyle::LK_JavaScript || + Style.isCSharp()) nextToken(); else parseAccessSpecifier(); @@ -1213,9 +1214,10 @@ // parseRecord falls through and does not yet add an unwrapped line as a // record declaration or definition can start a structural element. parseRecord(); - // This does not apply for Java and JavaScript. + // This does not apply for Java, JavaScript and C#. if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) { + Style.Language == FormatStyle::LK_JavaScript || + Style.isCSharp()) { if (FormatTok->is(tok::semi)) nextToken(); addUnwrappedLine(); Index: lib/Lex/Lexer.cpp =================================================================== --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -1911,7 +1911,7 @@ while (C != '"') { // Skip escaped characters. Escaped newlines will already be processed by // getAndAdvanceChar. - if (C == '\\') + if (C == '\\' && Kind != tok::verbatim_string_literal) C = getAndAdvanceChar(CurPtr, Result); if (C == '\n' || C == '\r' || // Newline. @@ -3841,9 +3841,16 @@ } break; - case '@': - // Objective C support. - if (CurPtr[-1] == '@' && LangOpts.ObjC) + case '@': // Objective C support or verbatim string literal (C#) + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + Char = getCharAndSize(CurPtr, SizeTmp); + + // verbatim string literal (C#) . + if (Char == '"' && LangOpts.CSharp) + return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), + tok::verbatim_string_literal); + else if (CurPtr[-1] == '@' && LangOpts.ObjC) Kind = tok::at; else Kind = tok::unknown; Index: lib/Lex/LiteralSupport.cpp =================================================================== --- lib/Lex/LiteralSupport.cpp +++ lib/Lex/LiteralSupport.cpp @@ -42,6 +42,7 @@ case tok::string_literal: case tok::utf8_char_constant: case tok::utf8_string_literal: + case tok::verbatim_string_literal: return Target.getCharWidth(); case tok::wide_char_constant: case tok::wide_string_literal: Index: lib/Lex/TokenConcatenation.cpp =================================================================== --- lib/Lex/TokenConcatenation.cpp +++ lib/Lex/TokenConcatenation.cpp @@ -214,6 +214,7 @@ llvm_unreachable("tok::raw_identifier in non-raw lexing mode!"); case tok::string_literal: + case tok::verbatim_string_literal: case tok::wide_string_literal: case tok::utf8_string_literal: case tok::utf16_string_literal: @@ -242,10 +243,11 @@ return GetFirstChar(PP, Tok) != '.'; if (Tok.getIdentifierInfo() || - Tok.isOneOf(tok::wide_string_literal, tok::utf8_string_literal, - tok::utf16_string_literal, tok::utf32_string_literal, - tok::wide_char_constant, tok::utf8_char_constant, - tok::utf16_char_constant, tok::utf32_char_constant)) + Tok.isOneOf(tok::wide_string_literal, tok::verbatim_string_literal, + tok::utf8_string_literal, tok::utf16_string_literal, + tok::utf32_string_literal, tok::wide_char_constant, + tok::utf8_char_constant, tok::utf16_char_constant, + tok::utf32_char_constant)) return true; // If this isn't identifier + string, we're done. Index: tools/clang-format/ClangFormat.cpp =================================================================== --- tools/clang-format/ClangFormat.cpp +++ tools/clang-format/ClangFormat.cpp @@ -345,7 +345,7 @@ cl::SetVersionPrinter(PrintVersion); cl::ParseCommandLineOptions( argc, argv, - "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf code.\n\n" + "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.\n\n" "If no arguments are specified, it formats the code from standard input\n" "and writes the result to the standard output.\n" "If s are given, it reformats the files. If -i is specified\n" Index: unittests/Format/CMakeLists.txt =================================================================== --- unittests/Format/CMakeLists.txt +++ unittests/Format/CMakeLists.txt @@ -6,6 +6,7 @@ CleanupTest.cpp FormatTest.cpp FormatTestComments.cpp + FormatTestCSharp.cpp FormatTestJS.cpp FormatTestJava.cpp FormatTestObjC.cpp Index: unittests/Format/FormatTestCSharp.cpp =================================================================== --- /dev/null +++ unittests/Format/FormatTestCSharp.cpp @@ -0,0 +1,170 @@ +//===- unittest/Format/FormatTestCSharp.cpp - Formatting tests for CSharp -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FormatTestUtils.h" +#include "clang/Format/Format.h" +#include "llvm/Support/Debug.h" +#include "gtest/gtest.h" + +#define DEBUG_TYPE "format-test" + +namespace clang { +namespace format { + +class FormatTestCSharp : public ::testing::Test { +protected: + static std::string format(llvm::StringRef Code, unsigned Offset, + unsigned Length, const FormatStyle &Style) { + LLVM_DEBUG(llvm::errs() << "---\n"); + LLVM_DEBUG(llvm::errs() << Code << "\n\n"); + std::vector Ranges(1, tooling::Range(Offset, Length)); + tooling::Replacements Replaces = reformat(Style, Code, Ranges); + auto Result = applyAllReplacements(Code, Replaces); + EXPECT_TRUE(static_cast(Result)); + LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n"); + return *Result; + } + + static std::string + format(llvm::StringRef Code, + const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_CSharp)) { + return format(Code, 0, Code.size(), Style); + } + + static FormatStyle getStyleWithColumns(unsigned ColumnLimit) { + FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp); + Style.ColumnLimit = ColumnLimit; + return Style; + } + + static void verifyFormat( + llvm::StringRef Code, + const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_CSharp)) { + EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable"; + EXPECT_EQ(Code.str(), format(test::messUp(Code), Style)); + } +}; + +TEST_F(FormatTestCSharp, CSharpClass) { + verifyFormat("public class SomeClass {\n" + " void f() {}\n" + " int g() { return 0; }\n" + " void h() {\n" + " while (true) f();\n" + " for (;;) f();\n" + " if (true) f();\n" + " }\n" + "}"); +} + +TEST_F(FormatTestCSharp, AccessModifiers) { + verifyFormat("public String toString() {}"); + verifyFormat("private String toString() {}"); + verifyFormat("protected String toString() {}"); + verifyFormat("internal String toString() {}"); + + verifyFormat("public override String toString() {}"); + verifyFormat("private override String toString() {}"); + verifyFormat("protected override String toString() {}"); + verifyFormat("internal override String toString() {}"); + + verifyFormat("internal static String toString() {}"); +} + +TEST_F(FormatTestCSharp, NoStringLiteralBreaks) { + verifyFormat("foo(" + "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaa\");"); +} + +TEST_F(FormatTestCSharp, CSharpVerbatiumStringLiterals) { + verifyFormat("foo(@\"aaaaaaaa\\abc\\aaaa\");"); + // @"ABC\" + ToString("B") - handle embedded \ in literal string at + // the end + verifyFormat("string s = @\"ABC\\\" + ToString(\"B\");"); +} + +TEST_F(FormatTestCSharp, CSharpInterpolatedStringLiterals) { + verifyFormat("foo($\"aaaaaaaa{aaa}aaaa\");"); + verifyFormat("foo($\"aaaa{A}\");"); + verifyFormat( + "foo($\"aaaa{A}" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\");"); + verifyFormat("Name = $\"{firstName} {lastName}\";"); + + // $"ABC\" + ToString("B") - handle embedded \ in literal string at + // the end + verifyFormat("string s = $\"A{abc}BC\" + ToString(\"B\");"); + verifyFormat("$\"{domain}\\\\{user}\""); + verifyFormat( + "var verbatimInterpolated = $@\"C:\\Users\\{userName}\\Documents\\\";"); +} + +TEST_F(FormatTestCSharp, CSharpFatArrows) { + verifyFormat("Task serverTask = Task.Run(async() => {"); + verifyFormat("public override string ToString() => \"{Name}\\{Age}\";"); +} + +TEST_F(FormatTestCSharp, CSharpNullConditional) { + verifyFormat( + "public Person(string firstName, string lastName, int? age=null)"); + + verifyFormat("switch(args?.Length)"); + + verifyFormat("public static void Main(string[] args) { string dirPath " + "= args?[0]; }"); +} + +TEST_F(FormatTestCSharp, Attributes) { + verifyFormat("[STAThread]\n" + "static void\n" + "Main(string[] args) {}"); + + verifyFormat("[TestMethod]\n" + "private class Test {}"); + + verifyFormat("[TestMethod]\n" + "protected class Test {}"); + + verifyFormat("[TestMethod]\n" + "internal class Test {}"); + + verifyFormat("[TestMethod]\n" + "class Test {}"); + + verifyFormat("[TestMethod]\n" + "[DeploymentItem(\"Test.txt\")]\n" + "public class Test {}"); + + verifyFormat("[System.AttributeUsage(System.AttributeTargets.Method)]\n" + "[System.Runtime.InteropServices.ComVisible(true)]\n" + "public sealed class STAThreadAttribute : Attribute {}"); + + verifyFormat("[Verb(\"start\", HelpText = \"Starts the server listening on " + "provided port\")]\n" + "class Test {}"); + + verifyFormat("[TestMethod]\n" + "public string Host {\n set;\n get;\n}"); + + verifyFormat("[TestMethod(\"start\", HelpText = \"Starts the server " + "listening on provided host\")]\n" + "public string Host {\n set;\n get;\n}"); +} + +TEST_F(FormatTestCSharp, CSharpRegions) { + verifyFormat("#region aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaa " + "aaaaaaaaaaaaaaa long region"); +} + +TEST_F(FormatTestCSharp, CSharpKeyWordEscaping) { + verifyFormat("public enum var { none, @string, bool, @enum }"); +} + +} // namespace format +} // end namespace clang