Index: docs/ClangFormat.rst =================================================================== --- docs/ClangFormat.rst +++ docs/ClangFormat.rst @@ -11,12 +11,12 @@ =============== :program:`clang-format` is located in `clang/tools/clang-format` and can be used -to format C/C++/Java/JavaScript/Objective-C/Protobuf code. +to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code. .. code-block:: console $ clang-format -help - OVERVIEW: A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf code. + OVERVIEW: A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code. If no arguments are specified, it formats the code from standard input and writes the result to the standard output. Index: docs/ClangFormatStyleOptions.rst =================================================================== --- docs/ClangFormatStyleOptions.rst +++ docs/ClangFormatStyleOptions.rst @@ -7,8 +7,8 @@ When using :program:`clang-format` command line utility or ``clang::format::reformat(...)`` functions from code, one can either use one of -the predefined styles (LLVM, Google, Chromium, Mozilla, WebKit) or create a -custom style by configuring specific style options. +the predefined styles (LLVM, Google, Chromium, Mozilla, WebKit, Microsoft) or +create a custom style by configuring specific style options. Configuring Style with clang-format @@ -68,6 +68,10 @@ Language: Proto # Don't format .proto files. DisableFormat: true + --- + Language: CSharp + # Use 100 columns for C#. + ColumnLimit: 100 ... An easy way to get a valid ``.clang-format`` file containing all configuration @@ -144,6 +148,9 @@ * ``WebKit`` A style complying with `WebKit's style guide `_ + * ``Microsoft`` + A style complying with `Microsoft's style guide + '_ .. START_FORMAT_STYLE_OPTIONS @@ -1496,6 +1503,9 @@ * ``LK_Cpp`` (in configuration: ``Cpp``) Should be used for C, C++. + * ``LK_CSharp`` (in configuration: ``CSharp``) + Should be used for C#. + * ``LK_Java`` (in configuration: ``Java``) Should be used for Java. Index: docs/ReleaseNotes.rst =================================================================== --- docs/ReleaseNotes.rst +++ docs/ReleaseNotes.rst @@ -158,7 +158,8 @@ ------------ -- ... +- Add language support for clang-formatting C# files +- Add Microsoft coding style to encapsulate default C# formatting style libclang -------- Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -92,6 +92,8 @@ LANGOPT(CPlusPlus17 , 1, 0, "C++17") LANGOPT(CPlusPlus2a , 1, 0, "C++2a") LANGOPT(ObjC , 1, 0, "Objective-C") +LANGOPT(CSharp , 1, 0, "C#") + BENIGN_LANGOPT(ObjCDefaultSynthProperties , 1, 0, "Objective-C auto-synthesized properties") BENIGN_LANGOPT(EncodeExtendedBlockSig , 1, 0, Index: include/clang/Basic/TokenKinds.h =================================================================== --- include/clang/Basic/TokenKinds.h +++ include/clang/Basic/TokenKinds.h @@ -77,7 +77,7 @@ inline bool isStringLiteral(TokenKind K) { return K == tok::string_literal || K == tok::wide_string_literal || K == tok::utf8_string_literal || K == tok::utf16_string_literal || - K == tok::utf32_string_literal; + K == tok::utf32_string_literal || K == tok::verbatim_string_literal; } /// Return true if this is a "literal" kind, like a numeric @@ -86,7 +86,8 @@ return K == tok::numeric_constant || K == tok::char_constant || K == tok::wide_char_constant || K == tok::utf8_char_constant || K == tok::utf16_char_constant || K == tok::utf32_char_constant || - isStringLiteral(K) || K == tok::angle_string_literal; + isStringLiteral(K) || K == tok::angle_string_literal || + K == tok::verbatim_string_literal; } /// Return true if this is any of tok::annot_* kinds. Index: include/clang/Basic/TokenKinds.def =================================================================== --- include/clang/Basic/TokenKinds.def +++ include/clang/Basic/TokenKinds.def @@ -221,6 +221,10 @@ // Objective C support. PUNCTUATOR(at, "@") +// C# support +PUNCTUATOR(dollar, "$") +TOK(verbatim_string_literal) // @"foo" + // CUDA support. PUNCTUATOR(lesslessless, "<<<") PUNCTUATOR(greatergreatergreater, ">>>") Index: include/clang/Format/Format.h =================================================================== --- include/clang/Format/Format.h +++ include/clang/Format/Format.h @@ -1219,6 +1219,8 @@ LK_None, /// Should be used for C, C++. LK_Cpp, + /// Should be used for C#. + LK_CSharp, /// Should be used for Java. LK_Java, /// Should be used for JavaScript. @@ -1235,6 +1237,7 @@ LK_TextProto }; bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; } + bool isCSharp() const { return Language == LK_CSharp; } /// Language, this format style is targeted at. LanguageKind Language; @@ -2051,6 +2054,8 @@ switch (Language) { case FormatStyle::LK_Cpp: return "C++"; + case FormatStyle::LK_CSharp: + return "CSharp"; case FormatStyle::LK_ObjC: return "Objective-C"; case FormatStyle::LK_Java: Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -418,7 +418,7 @@ if (Style.AlwaysBreakBeforeMultilineStrings && (NewLineColumn == State.FirstIndent + Style.ContinuationIndentWidth || Previous.is(tok::comma) || Current.NestingLevel < 2) && - !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at) && + !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at, tok::dollar) && !Previous.isOneOf(TT_InlineASMColon, TT_ConditionalExpr) && nextIsMultilineString(State)) return true; @@ -1158,6 +1158,8 @@ if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0) State.StartOfStringLiteral = State.Column + 1; + if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0) + State.StartOfStringLiteral = State.Column + 1; else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) State.StartOfStringLiteral = State.Column; else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && @@ -1711,10 +1713,11 @@ LineState &State, bool AllowBreak) { unsigned StartColumn = State.Column - Current.ColumnWidth; if (Current.isStringLiteral()) { - // FIXME: String literal breaking is currently disabled for Java and JS, as - // it requires strings to be merged using "+" which we don't support. + // FIXME: String literal breaking is currently disabled for C#,Java and + // JavaScript, as it requires strings to be merged using "+" which we + // don't support. if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript || + Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp() || !Style.BreakStringLiterals || !AllowBreak) return nullptr; Index: lib/Format/Format.cpp =================================================================== --- lib/Format/Format.cpp +++ lib/Format/Format.cpp @@ -61,6 +61,7 @@ IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen); IO.enumCase(Value, "TextProto", FormatStyle::LK_TextProto); + IO.enumCase(Value, "CSharp", FormatStyle::LK_CSharp); } }; @@ -274,8 +275,8 @@ IO.mapOptional("Language", Style.Language); if (IO.outputting()) { - StringRef StylesArray[] = {"LLVM", "Google", "Chromium", - "Mozilla", "WebKit", "GNU"}; + StringRef StylesArray[] = {"LLVM", "Google", "Chromium", "Mozilla", + "WebKit", "GNU", "Microsoft"}; ArrayRef Styles(StylesArray); for (size_t i = 0, e = Styles.size(); i < e; ++i) { StringRef StyleName(Styles[i]); @@ -936,6 +937,32 @@ return Style; } +FormatStyle getMicrosoftStyle(FormatStyle::LanguageKind Language) { + FormatStyle Style = getLLVMStyle(); + Style.ColumnLimit = 120; + Style.TabWidth = 4; + Style.IndentWidth = 4; + Style.UseTab = FormatStyle::UT_Never; + Style.BreakBeforeBraces = FormatStyle::BS_Custom; + Style.BraceWrapping.AfterClass = true; + Style.BraceWrapping.AfterControlStatement = true; + Style.BraceWrapping.AfterEnum = true; + Style.BraceWrapping.AfterFunction = true; + Style.BraceWrapping.AfterNamespace = true; + Style.BraceWrapping.AfterObjCDeclaration = true; + Style.BraceWrapping.AfterStruct = true; + Style.BraceWrapping.AfterExternBlock = true; + Style.BraceWrapping.BeforeCatch = true; + Style.BraceWrapping.BeforeElse = true; + Style.PenaltyReturnTypeOnItsOwnLine = 1000; + Style.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; + Style.AllowShortBlocksOnASingleLine = false; + Style.AllowShortCaseLabelsOnASingleLine = false; + Style.AllowShortIfStatementsOnASingleLine = false; + Style.AllowShortLoopsOnASingleLine = false; + return Style; +} + FormatStyle getNoStyle() { FormatStyle NoStyle = getLLVMStyle(); NoStyle.DisableFormat = true; @@ -958,6 +985,8 @@ *Style = getWebKitStyle(); } else if (Name.equals_lower("gnu")) { *Style = getGNUStyle(); + } else if (Name.equals_lower("microsoft")) { + *Style = getMicrosoftStyle(Language); } else if (Name.equals_lower("none")) { *Style = getNoStyle(); } else { @@ -2276,6 +2305,7 @@ LangOpts.ObjC = 1; LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. LangOpts.DeclSpecKeyword = 1; // To get __declspec. + LangOpts.CSharp = Style.isCSharp(); return LangOpts; } @@ -2307,6 +2337,8 @@ return FormatStyle::LK_TextProto; if (FileName.endswith_lower(".td")) return FormatStyle::LK_TableGen; + if (FileName.endswith_lower(".cs")) + return FormatStyle::LK_CSharp; return FormatStyle::LK_Cpp; } Index: lib/Format/FormatToken.h =================================================================== --- lib/Format/FormatToken.h +++ lib/Format/FormatToken.h @@ -95,6 +95,8 @@ TYPE(TrailingReturnArrow) \ TYPE(TrailingUnaryOperator) \ TYPE(UnaryOperator) \ + TYPE(CSharpStringLiteral) \ + TYPE(CSharpNullCoalescing) \ TYPE(Unknown) enum TokenType { @@ -722,7 +724,35 @@ kw_slots = &IdentTable.get("slots"); kw_qslots = &IdentTable.get("Q_SLOTS"); - // Keep this at the end of the constructor to make sure everything here is + // C# keywords + kw_base = &IdentTable.get("base"); + kw_byte = &IdentTable.get("byte"); + kw_checked = &IdentTable.get("checked"); + kw_decimal = &IdentTable.get("decimal"); + kw_delegate = &IdentTable.get("delegate"); + kw_event = &IdentTable.get("event"); + kw_fixed = &IdentTable.get("fixed"); + kw_foreach = &IdentTable.get("foreach"); + kw_implicit = &IdentTable.get("implicit"); + kw_internal = &IdentTable.get("internal"); + kw_lock = &IdentTable.get("lock"); + kw_null = &IdentTable.get("null"); + kw_object = &IdentTable.get("object"); + kw_out = &IdentTable.get("out"); + kw_params = &IdentTable.get("params"); + kw_ref = &IdentTable.get("ref"); + kw_string = &IdentTable.get("string"); + kw_stackalloc = &IdentTable.get("stackalloc"); + kw_sbyte = &IdentTable.get("sbyte"); + kw_sealed = &IdentTable.get("sealed"); + kw_uint = &IdentTable.get("uint"); + kw_ulong = &IdentTable.get("ulong"); + kw_unchecked = &IdentTable.get("unchecked"); + kw_unsafe = &IdentTable.get("unsafe"); + kw_ushort = &IdentTable.get("ushort"); + + // Keep this at the end of the constructor to make sure everything here + // is // already initialized. JsExtraKeywords = std::unordered_set( {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, @@ -730,6 +760,19 @@ kw_set, kw_type, kw_typeof, kw_var, kw_yield, // Keywords from the Java section. kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); + + CSharpExtraKeywords = std::unordered_set( + {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event, + kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal, + kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params, + kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed, + kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, + // Keywords from the JavaScript section. + kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, + kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, + kw_set, kw_type, kw_typeof, kw_var, kw_yield, + // Keywords from the Java section. + kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); } // Context sensitive keywords. @@ -795,6 +838,36 @@ IdentifierInfo *kw_slots; IdentifierInfo *kw_qslots; + // C# keywords + IdentifierInfo *kw_base; + IdentifierInfo *kw_byte; + IdentifierInfo *kw_checked; + IdentifierInfo *kw_decimal; + IdentifierInfo *kw_delegate; + IdentifierInfo *kw_event; + IdentifierInfo *kw_fixed; + IdentifierInfo *kw_foreach; + IdentifierInfo *kw_implicit; + IdentifierInfo *kw_internal; + + IdentifierInfo *kw_lock; + IdentifierInfo *kw_null; + IdentifierInfo *kw_object; + IdentifierInfo *kw_out; + + IdentifierInfo *kw_params; + + IdentifierInfo *kw_ref; + IdentifierInfo *kw_string; + IdentifierInfo *kw_stackalloc; + IdentifierInfo *kw_sbyte; + IdentifierInfo *kw_sealed; + IdentifierInfo *kw_uint; + IdentifierInfo *kw_ulong; + IdentifierInfo *kw_unchecked; + IdentifierInfo *kw_unsafe; + IdentifierInfo *kw_ushort; + /// Returns \c true if \p Tok is a true JavaScript identifier, returns /// \c false if it is a keyword or a pseudo keyword. bool IsJavaScriptIdentifier(const FormatToken &Tok) const { @@ -803,9 +876,68 @@ JsExtraKeywords.end(); } + /// Returns \c true if \p Tok is a C# keyword, returns + /// \c false if it is a anything else. + bool isCSharpKeyword(const FormatToken &Tok) const { + switch (Tok.Tok.getKind()) { + case tok::kw_bool: + case tok::kw_break: + case tok::kw_case: + case tok::kw_catch: + case tok::kw_char: + case tok::kw_class: + case tok::kw_const: + case tok::kw_continue: + case tok::kw_default: + case tok::kw_do: + case tok::kw_double: + case tok::kw_else: + case tok::kw_enum: + case tok::kw_explicit: + case tok::kw_extern: + case tok::kw_false: + case tok::kw_float: + case tok::kw_for: + case tok::kw_goto: + case tok::kw_if: + case tok::kw_int: + case tok::kw_long: + case tok::kw_namespace: + case tok::kw_new: + case tok::kw_operator: + case tok::kw_private: + case tok::kw_protected: + case tok::kw_public: + case tok::kw_return: + case tok::kw_short: + case tok::kw_sizeof: + case tok::kw_static: + case tok::kw_struct: + case tok::kw_switch: + case tok::kw_this: + case tok::kw_throw: + case tok::kw_true: + case tok::kw_try: + case tok::kw_typeof: + case tok::kw_using: + case tok::kw_virtual: + case tok::kw_void: + case tok::kw_volatile: + case tok::kw_while: + return true; + default: + return Tok.is(tok::identifier) && + CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == + CSharpExtraKeywords.end(); + } + } + private: /// The JavaScript keywords beyond the C++ keyword set. std::unordered_set JsExtraKeywords; + + /// The C# keywords beyond the C++ keyword set + std::unordered_set CSharpExtraKeywords; }; } // namespace format Index: lib/Format/FormatTokenLexer.h =================================================================== --- lib/Format/FormatTokenLexer.h +++ lib/Format/FormatTokenLexer.h @@ -48,6 +48,10 @@ bool tryMergeLessLess(); bool tryMergeNSStringLiteral(); + bool tryMergeCSharpStringLiteral(); + bool tryMergeCSharpKeywordVariables(); + bool tryMergeCSharpNullConditionals(); + bool tryMergeCSharpDoubleQuestion(); bool tryMergeTokens(ArrayRef Kinds, TokenType NewType); Index: lib/Format/FormatTokenLexer.cpp =================================================================== --- lib/Format/FormatTokenLexer.cpp +++ lib/Format/FormatTokenLexer.cpp @@ -66,6 +66,21 @@ return; if (tryMergeLessLess()) return; + + if (Style.isCSharp()) { + if (tryMergeCSharpStringLiteral()) + return; + if (tryMergeCSharpKeywordVariables()) + return; + if (tryMergeCSharpDoubleQuestion()) + return; + if (tryMergeCSharpNullConditionals()) + return; + static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater}; + if (tryMergeTokens(JSRightArrow, TT_JsFatArrow)) + return; + } + if (tryMergeNSStringLiteral()) return; @@ -110,7 +125,8 @@ return false; auto &At = *(Tokens.end() - 2); auto &String = *(Tokens.end() - 1); - if (!At->is(tok::at) || !String->is(tok::string_literal)) + if (!At->is(tok::at) || + !String->isOneOf(tok::string_literal, tok::verbatim_string_literal)) return false; At->Tok.setKind(tok::string_literal); At->TokenText = StringRef(At->TokenText.begin(), @@ -121,6 +137,99 @@ return true; } +// Search for interpolated string literals $"aaaaa{abc}aaaaa" and mark token +// as TT_CSharpStringLiteral, and to prevent splitting of $ and ". +bool FormatTokenLexer::tryMergeCSharpStringLiteral() { + if (Tokens.size() < 2) + return false; + auto &At = *(Tokens.end() - 2); + auto &String = *(Tokens.end() - 1); + + // Look for $"aaaaaa" @"aaaaaa". + if (!(At->is(tok::at) || At->TokenText == "$") || + !String->isOneOf(tok::string_literal, tok::verbatim_string_literal)) + return false; + + // Look for $@"aaaaaa" + if (Tokens.size() >= 2 && At->is(tok::at)) { + auto &Dollar = *(Tokens.end() - 3); + if (Dollar->TokenText == "$") { + // this looks like $@"aaaaa" so we need to combine all 3 + Dollar->Tok.setKind(tok::string_literal); + Dollar->TokenText = + StringRef(Dollar->TokenText.begin(), + String->TokenText.end() - Dollar->TokenText.begin()); + Dollar->ColumnWidth += (At->ColumnWidth + String->ColumnWidth); + Dollar->Type = TT_CSharpStringLiteral; + Tokens.erase(Tokens.end() - 2); + return true; + } + } + + // Convert back into just a string_literal + At->Tok.setKind(tok::string_literal); + At->TokenText = StringRef(At->TokenText.begin(), + String->TokenText.end() - At->TokenText.begin()); + At->ColumnWidth += String->ColumnWidth; + At->Type = TT_CSharpStringLiteral; + Tokens.erase(Tokens.end() - 1); + return true; +} + +bool FormatTokenLexer::tryMergeCSharpDoubleQuestion() { + if (Tokens.size() < 2) + return false; + auto &FirstQuestion = *(Tokens.end() - 2); + auto &SecondQuestion = *(Tokens.end() - 1); + if (!FirstQuestion->is(tok::question) || !SecondQuestion->is(tok::question)) + return false; + FirstQuestion->Tok.setKind(tok::question); + FirstQuestion->TokenText = StringRef(FirstQuestion->TokenText.begin(), + SecondQuestion->TokenText.end() - + FirstQuestion->TokenText.begin()); + FirstQuestion->ColumnWidth += SecondQuestion->ColumnWidth; + FirstQuestion->Type = TT_CSharpNullCoalescing; + Tokens.erase(Tokens.end() - 1); + return true; +} + +bool FormatTokenLexer::tryMergeCSharpKeywordVariables() { + if (Tokens.size() < 2) + return false; + auto &At = *(Tokens.end() - 2); + auto &Keyword = *(Tokens.end() - 1); + if (!At->is(tok::at)) + return false; + if (!Keywords.isCSharpKeyword(*Keyword)) + return false; + + At->Tok.setKind(tok::identifier); + At->TokenText = StringRef(At->TokenText.begin(), + Keyword->TokenText.end() - At->TokenText.begin()); + At->ColumnWidth += Keyword->ColumnWidth; + At->Type = Keyword->Type; + Tokens.erase(Tokens.end() - 1); + return true; +} + +// in C# merge the Identifier and the ? together arg? +bool FormatTokenLexer::tryMergeCSharpNullConditionals() { + if (Tokens.size() < 2) + return false; + auto &Identifier = *(Tokens.end() - 2); + auto &Question = *(Tokens.end() - 1); + if (!Identifier->isOneOf(tok::r_square, tok::identifier) || + !Question->is(tok::question)) + return false; + Identifier->TokenText = + StringRef(Identifier->TokenText.begin(), + Question->TokenText.end() - Identifier->TokenText.begin()); + Identifier->ColumnWidth += Question->ColumnWidth; + Identifier->Type = Identifier->Type; + Tokens.erase(Tokens.end() - 1); + return true; +} + bool FormatTokenLexer::tryMergeLessLess() { // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. if (Tokens.size() < 3) Index: lib/Format/TokenAnnotator.cpp =================================================================== --- lib/Format/TokenAnnotator.cpp +++ lib/Format/TokenAnnotator.cpp @@ -298,6 +298,8 @@ CurrentToken->Type = TT_JavaAnnotation; if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation)) CurrentToken->Type = TT_LeadingJavaAnnotation; + if (Left->Previous && Left->Previous->is(TT_AttributeSquare)) + CurrentToken->Type = TT_AttributeSquare; if (!HasMultipleLines) Left->PackingKind = PPK_Inconclusive; @@ -348,6 +350,40 @@ return false; } + bool isCSharpAttributeSpecifier(const FormatToken &Tok) { + if (!Style.isCSharp()) + return false; + + const FormatToken *AttrTok = Tok.Next; + if (!AttrTok) + return false; + + // Just an empty declaration e.g. string []. + if (AttrTok->is(tok::r_square)) + return false; + + // Move along the tokens inbetween the '[' and ']' e.g. [STAThread] + while (AttrTok && AttrTok->isNot(tok::r_square)) { + AttrTok = AttrTok->Next; + } + + if (!AttrTok) + return false; + + // move past the end of ']' + AttrTok = AttrTok->Next; + if (!AttrTok) + return false; + + // limit this to being an access modifier that follows + if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected, + tok::kw_class, tok::kw_static, tok::l_square, + Keywords.kw_internal)) { + return true; + } + return false; + } + bool isCpp11AttributeSpecifier(const FormatToken &Tok) { if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square)) return false; @@ -398,6 +434,11 @@ bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) || Contexts.back().InCpp11AttributeSpecifier; + // Treat C# Attributes [STAThread] much like C++ attributes [[...]]. + bool IsCSharp11AttributeSpecifier = + isCSharpAttributeSpecifier(*Left) || + Contexts.back().InCSharpAttributeSpecifier; + bool InsideInlineASM = Line.startsWith(tok::kw_asm); bool StartsObjCMethodExpr = !InsideInlineASM && !CppArrayTemplates && Style.isCpp() && @@ -475,6 +516,8 @@ // Should only be relevant to JavaScript: tok::kw_default)) { Left->Type = TT_ArrayInitializerLSquare; + } else if (IsCSharp11AttributeSpecifier) { + Left->Type = TT_AttributeSquare; } else { BindingIncrease = 10; Left->Type = TT_ArraySubscriptLSquare; @@ -489,11 +532,14 @@ Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr; Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier; + Contexts.back().InCSharpAttributeSpecifier = IsCSharp11AttributeSpecifier; while (CurrentToken) { if (CurrentToken->is(tok::r_square)) { if (IsCpp11AttributeSpecifier) CurrentToken->Type = TT_AttributeSquare; + if (IsCSharp11AttributeSpecifier) + CurrentToken->Type = TT_AttributeSquare; else if (((CurrentToken->Next && CurrentToken->Next->is(tok::l_paren)) || (CurrentToken->Previous && @@ -1190,6 +1236,7 @@ bool CaretFound = false; bool IsForEachMacro = false; bool InCpp11AttributeSpecifier = false; + bool InCSharpAttributeSpecifier = false; }; /// Puts a new \c Context onto the stack \c Contexts for the lifetime @@ -2624,7 +2671,7 @@ // and "%d %d" if (Left.is(tok::numeric_constant) && Right.is(tok::percent)) return Right.WhitespaceRange.getEnd() != Right.WhitespaceRange.getBegin(); - } else if (Style.Language == FormatStyle::LK_JavaScript) { + } else if (Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) { if (Left.is(TT_JsFatArrow)) return true; // for await ( ... @@ -2971,6 +3018,14 @@ if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine) return true; + // Put multiple C# attributes on a new line. + if (Style.isCSharp() && + ((Left.is(TT_AttributeSquare) && Left.is(tok::r_square)) || + (Left.is(tok::r_square) && Right.is(TT_AttributeSquare) && + Right.is(tok::l_square)))) + return true; + + // Put multiple Java annotation on a new line. if ((Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript) && Left.is(TT_LeadingJavaAnnotation) && Index: lib/Format/UnwrappedLineFormatter.cpp =================================================================== --- lib/Format/UnwrappedLineFormatter.cpp +++ lib/Format/UnwrappedLineFormatter.cpp @@ -94,7 +94,7 @@ /// characters to the left from their level. int getIndentOffset(const FormatToken &RootToken) { if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) + Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) return 0; if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier() || @@ -1071,7 +1071,9 @@ TheLine.Last->TotalLength + Indent <= ColumnLimit || (TheLine.Type == LT_ImportStatement && (Style.Language != FormatStyle::LK_JavaScript || - !Style.JavaScriptWrapImports)); + !Style.JavaScriptWrapImports)) || + (Style.isCSharp() && + TheLine.InPPDirective); // don't split #regions in C# if (Style.ColumnLimit == 0) NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) .formatLine(TheLine, NextStartColumn + Indent, Index: lib/Format/UnwrappedLineParser.cpp =================================================================== --- lib/Format/UnwrappedLineParser.cpp +++ lib/Format/UnwrappedLineParser.cpp @@ -999,7 +999,7 @@ case tok::kw_protected: case tok::kw_private: if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) + Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) nextToken(); else parseAccessSpecifier(); @@ -1213,9 +1213,9 @@ // parseRecord falls through and does not yet add an unwrapped line as a // record declaration or definition can start a structural element. parseRecord(); - // This does not apply for Java and JavaScript. + // This does not apply for Java, JavaScript and C#. if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) { + Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) { if (FormatTok->is(tok::semi)) nextToken(); addUnwrappedLine(); Index: lib/Lex/Lexer.cpp =================================================================== --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -1908,10 +1908,25 @@ : diag::warn_c99_compat_unicode_literal); char C = getAndAdvanceChar(CurPtr, Result); - while (C != '"') { + while (true) { + // in a verbatium string you double quotes are doubly + // escaped, when you see a " check the next character + // and break the loop only if its not a another quote + if (C == '"' && Kind == tok::verbatim_string_literal) { + // peek the next character + unsigned Size = 0; + char NextChar = getCharAndSize(CurPtr, Size); + if (NextChar != '"') { + break; + } + // it was "" so consume the second quote + C = getAndAdvanceChar(CurPtr, Result); + } else if (C == '"') { + break; + } // Skip escaped characters. Escaped newlines will already be processed by // getAndAdvanceChar. - if (C == '\\') + if (C == '\\' && Kind != tok::verbatim_string_literal) C = getAndAdvanceChar(CurPtr, Result); if (C == '\n' || C == '\r' || // Newline. @@ -3841,9 +3856,16 @@ } break; - case '@': - // Objective C support. - if (CurPtr[-1] == '@' && LangOpts.ObjC) + case '@': // Objective C support or verbatim string literal (C#) + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + Char = getCharAndSize(CurPtr, SizeTmp); + + // verbatim string literal (C#) . + if (Char == '"' && LangOpts.CSharp) + return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), + tok::verbatim_string_literal); + else if (CurPtr[-1] == '@' && LangOpts.ObjC) Kind = tok::at; else Kind = tok::unknown; Index: lib/Lex/LiteralSupport.cpp =================================================================== --- lib/Lex/LiteralSupport.cpp +++ lib/Lex/LiteralSupport.cpp @@ -42,6 +42,7 @@ case tok::string_literal: case tok::utf8_char_constant: case tok::utf8_string_literal: + case tok::verbatim_string_literal: return Target.getCharWidth(); case tok::wide_char_constant: case tok::wide_string_literal: Index: lib/Lex/TokenConcatenation.cpp =================================================================== --- lib/Lex/TokenConcatenation.cpp +++ lib/Lex/TokenConcatenation.cpp @@ -214,6 +214,7 @@ llvm_unreachable("tok::raw_identifier in non-raw lexing mode!"); case tok::string_literal: + case tok::verbatim_string_literal: case tok::wide_string_literal: case tok::utf8_string_literal: case tok::utf16_string_literal: @@ -242,10 +243,11 @@ return GetFirstChar(PP, Tok) != '.'; if (Tok.getIdentifierInfo() || - Tok.isOneOf(tok::wide_string_literal, tok::utf8_string_literal, - tok::utf16_string_literal, tok::utf32_string_literal, - tok::wide_char_constant, tok::utf8_char_constant, - tok::utf16_char_constant, tok::utf32_char_constant)) + Tok.isOneOf(tok::wide_string_literal, tok::verbatim_string_literal, + tok::utf8_string_literal, tok::utf16_string_literal, + tok::utf32_string_literal, tok::wide_char_constant, + tok::utf8_char_constant, tok::utf16_char_constant, + tok::utf32_char_constant)) return true; // If this isn't identifier + string, we're done. Index: lib/Rewrite/HTMLRewrite.cpp =================================================================== --- lib/Rewrite/HTMLRewrite.cpp +++ lib/Rewrite/HTMLRewrite.cpp @@ -470,10 +470,11 @@ --TokLen; // FALL THROUGH to chop the 8 LLVM_FALLTHROUGH; + case tok::verbatim_string_literal: case tok::wide_string_literal: case tok::utf16_string_literal: case tok::utf32_string_literal: - // Chop off the L, u, U or 8 prefix + // Chop off the @ L, u, U or 8 prefix ++TokOffs; --TokLen; LLVM_FALLTHROUGH; Index: tools/clang-format/ClangFormat.cpp =================================================================== --- tools/clang-format/ClangFormat.cpp +++ tools/clang-format/ClangFormat.cpp @@ -345,7 +345,7 @@ cl::SetVersionPrinter(PrintVersion); cl::ParseCommandLineOptions( argc, argv, - "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf code.\n\n" + "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.\n\n" "If no arguments are specified, it formats the code from standard input\n" "and writes the result to the standard output.\n" "If s are given, it reformats the files. If -i is specified\n" Index: unittests/Format/CMakeLists.txt =================================================================== --- unittests/Format/CMakeLists.txt +++ unittests/Format/CMakeLists.txt @@ -6,6 +6,7 @@ CleanupTest.cpp FormatTest.cpp FormatTestComments.cpp + FormatTestCSharp.cpp FormatTestJS.cpp FormatTestJava.cpp FormatTestObjC.cpp Index: unittests/Format/FormatTestCSharp.cpp =================================================================== --- /dev/null +++ unittests/Format/FormatTestCSharp.cpp @@ -0,0 +1,180 @@ +//===- unittest/Format/FormatTestCSharp.cpp - Formatting tests for CSharp -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FormatTestUtils.h" +#include "clang/Format/Format.h" +#include "llvm/Support/Debug.h" +#include "gtest/gtest.h" + +#define DEBUG_TYPE "format-test" + +namespace clang { +namespace format { + +class FormatTestCSharp : public ::testing::Test { +protected: + static std::string format(llvm::StringRef Code, unsigned Offset, + unsigned Length, const FormatStyle &Style) { + LLVM_DEBUG(llvm::errs() << "---\n"); + LLVM_DEBUG(llvm::errs() << Code << "\n\n"); + std::vector Ranges(1, tooling::Range(Offset, Length)); + tooling::Replacements Replaces = reformat(Style, Code, Ranges); + auto Result = applyAllReplacements(Code, Replaces); + EXPECT_TRUE(static_cast(Result)); + LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n"); + return *Result; + } + + static std::string + format(llvm::StringRef Code, + const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_CSharp)) { + return format(Code, 0, Code.size(), Style); + } + + static FormatStyle getStyleWithColumns(unsigned ColumnLimit) { + FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp); + Style.ColumnLimit = ColumnLimit; + return Style; + } + + static void verifyFormat( + llvm::StringRef Code, + const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_CSharp)) { + EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable"; + EXPECT_EQ(Code.str(), format(test::messUp(Code), Style)); + } +}; + +TEST_F(FormatTestCSharp, CSharpClass) { + verifyFormat("public class SomeClass {\n" + " void f() {}\n" + " int g() { return 0; }\n" + " void h() {\n" + " while (true) f();\n" + " for (;;) f();\n" + " if (true) f();\n" + " }\n" + "}"); +} + +TEST_F(FormatTestCSharp, AccessModifiers) { + verifyFormat("public String toString() {}"); + verifyFormat("private String toString() {}"); + verifyFormat("protected String toString() {}"); + verifyFormat("internal String toString() {}"); + + verifyFormat("public override String toString() {}"); + verifyFormat("private override String toString() {}"); + verifyFormat("protected override String toString() {}"); + verifyFormat("internal override String toString() {}"); + + verifyFormat("internal static String toString() {}"); +} + +TEST_F(FormatTestCSharp, NoStringLiteralBreaks) { + verifyFormat("foo(" + "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaa\");"); +} + +TEST_F(FormatTestCSharp, CSharpVerbatiumStringLiterals) { + verifyFormat("foo(@\"aaaaaaaa\\abc\\aaaa\");"); + // @"ABC\" + ToString("B") - handle embedded \ in literal string at + // the end + verifyFormat("string s = @\"ABC\\\" + ToString(\"B\");"); + + verifyFormat("string s = @\"ABC\"\"DEF\"\"GHI\""); + verifyFormat("string s = @\"ABC\"\"DEF\"\"\""); + verifyFormat("string s = @\"ABC\"\"DEF\"\"\" + abc"); +} + +TEST_F(FormatTestCSharp, CSharpInterpolatedStringLiterals) { + verifyFormat("foo($\"aaaaaaaa{aaa}aaaa\");"); + verifyFormat("foo($\"aaaa{A}\");"); + verifyFormat( + "foo($\"aaaa{A}" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\");"); + verifyFormat("Name = $\"{firstName} {lastName}\";"); + + // $"ABC\" + ToString("B") - handle embedded \ in literal string at + // the end + verifyFormat("string s = $\"A{abc}BC\" + ToString(\"B\");"); + verifyFormat("$\"{domain}\\\\{user}\""); + verifyFormat( + "var verbatimInterpolated = $@\"C:\\Users\\{userName}\\Documents\\\";"); +} + +TEST_F(FormatTestCSharp, CSharpFatArrows) { + verifyFormat("Task serverTask = Task.Run(async() => {"); + verifyFormat("public override string ToString() => \"{Name}\\{Age}\";"); +} + +TEST_F(FormatTestCSharp, CSharpNullConditional) { + verifyFormat( + "public Person(string firstName, string lastName, int? age=null)"); + + verifyFormat("switch(args?.Length)"); + + verifyFormat("public static void Main(string[] args) { string dirPath " + "= args?[0]; }"); +} + +TEST_F(FormatTestCSharp, Attributes) { + verifyFormat("[STAThread]\n" + "static void\n" + "Main(string[] args) {}"); + + verifyFormat("[TestMethod]\n" + "private class Test {}"); + + verifyFormat("[TestMethod]\n" + "protected class Test {}"); + + verifyFormat("[TestMethod]\n" + "internal class Test {}"); + + verifyFormat("[TestMethod]\n" + "class Test {}"); + + verifyFormat("[TestMethod]\n" + "[DeploymentItem(\"Test.txt\")]\n" + "public class Test {}"); + + verifyFormat("[System.AttributeUsage(System.AttributeTargets.Method)]\n" + "[System.Runtime.InteropServices.ComVisible(true)]\n" + "public sealed class STAThreadAttribute : Attribute {}"); + + verifyFormat("[Verb(\"start\", HelpText = \"Starts the server listening on " + "provided port\")]\n" + "class Test {}"); + + verifyFormat("[TestMethod]\n" + "public string Host {\n set;\n get;\n}"); + + verifyFormat("[TestMethod(\"start\", HelpText = \"Starts the server " + "listening on provided host\")]\n" + "public string Host {\n set;\n get;\n}"); +} + +TEST_F(FormatTestCSharp, CSharpRegions) { + verifyFormat("#region aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaa " + "aaaaaaaaaaaaaaa long region"); +} + +TEST_F(FormatTestCSharp, CSharpKeyWordEscaping) { + verifyFormat("public enum var { none, @string, bool, @enum }"); +} + +TEST_F(FormatTestCSharp, CSharpNullCoalescing) { + verifyFormat("var test = ABC ?? DEF"); + verifyFormat("string myname = name ?? \"ABC\";"); + verifyFormat("return _name ?? \"DEF\";"); +} + +} // namespace format +} // end namespace clang