diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -2238,6 +2238,16 @@ firstValue : SecondValueVeryVeryVeryVeryLong; +**BreakBetweenInstancePorts** (``Boolean``) :versionbadge:`clang-format 15` + For Verilog, put each port on its own line in module instantiations. + + .. code-block:: c++ + + ffnand ff1(.q(), + .qbar(out1), + .clear(in1), + .preset(in2)); + **BreakConstructorInitializers** (``BreakConstructorInitializersStyle``) :versionbadge:`clang-format 5` The break constructor initializers style to use. @@ -3193,6 +3203,9 @@ Should be used for Protocol Buffer messages in text format (https://developers.google.com/protocol-buffers/). + * ``LK_Verilog`` (in configuration: ``Verilog``) + Should be used for Verilog / SystemVerilog. + **MacroBlockBegin** (``String``) :versionbadge:`clang-format 3.7` diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -1676,6 +1676,16 @@ /// \version 3.7 bool BreakBeforeTernaryOperators; + /// For Verilog, put each port on its own line in module instantiations. + /// \code + /// ffnand ff1(.q(), + /// .qbar(out1), + /// .clear(in1), + /// .preset(in2)); + /// \endcode + /// \version 15 + bool BreakBetweenInstancePorts; + /// Different ways to break initializers. enum BreakConstructorInitializersStyle : unsigned char { /// Break constructor initializers before the colon and after the commas. @@ -2586,7 +2596,9 @@ LK_TableGen, /// Should be used for Protocol Buffer messages in text format /// (https://developers.google.com/protocol-buffers/). - LK_TextProto + LK_TextProto, + /// Should be used for Verilog / SystemVerilog. + LK_Verilog }; bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; } bool isCSharp() const { return Language == LK_CSharp; } @@ -3874,6 +3886,7 @@ BreakBeforeBraces == R.BreakBeforeBraces && BreakBeforeConceptDeclarations == R.BreakBeforeConceptDeclarations && BreakBeforeTernaryOperators == R.BreakBeforeTernaryOperators && + BreakBetweenInstancePorts == R.BreakBetweenInstancePorts && BreakConstructorInitializers == R.BreakConstructorInitializers && CompactNamespaces == R.CompactNamespaces && BreakAfterJavaFieldAnnotations == R.BreakAfterJavaFieldAnnotations && @@ -4213,6 +4226,9 @@ /// of ``getStyle()``. extern const char *StyleOptionHelpDescription; +/// Returns the description for assume-filename. +StringRef getAssumeFilenameHelp(); + /// The suggested format style to use by default. This allows tools using /// `getStyle` to have a consistent default style. /// Different builds can modify the value to the preferred styles. @@ -4282,6 +4298,8 @@ return "TableGen"; case FormatStyle::LK_TextProto: return "TextProto"; + case FormatStyle::LK_Verilog: + return "Verilog"; default: return "Unknown"; } diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -626,7 +626,7 @@ // Indent preprocessor directives after the hash if required. int PPColumnCorrection = 0; if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash && - Previous.is(tok::hash) && State.FirstIndent > 0 && + Keywords.isPPHash(Previous, Style) && State.FirstIndent > 0 && (State.Line->Type == LT_PreprocessorDirective || State.Line->Type == LT_ImportStatement)) { Spaces += State.FirstIndent; @@ -1035,8 +1035,12 @@ return std::max(CurrentState.LastSpace, CurrentState.Indent + Style.ContinuationIndentWidth); - if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths && - State.Line->First->is(tok::kw_enum)) + // After a goto label. Usually labels are on separate lines. However + // for Verilog the labels may be only recognized by the annotator and + // thus are on the same line as the current token. + if (Keywords.isEndOfLabel(Previous, Style) || + (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths && + State.Line->First->is(tok::kw_enum))) return (Style.IndentWidth * State.Line->First->IndentLevel) + Style.IndentWidth; @@ -1365,8 +1369,8 @@ State.StartOfStringLiteral = State.Column + 1; else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) State.StartOfStringLiteral = State.Column; - else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && - !Current.isStringLiteral()) + else if (!Current.isOneOf(tok::comment, tok::identifier) && + !Keywords.isPPHash(Current, Style) && !Current.isStringLiteral()) State.StartOfStringLiteral = 0; State.Column += Current.ColumnWidth; diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -68,6 +68,7 @@ IO.enumCase(Value, "TextProto", FormatStyle::LK_TextProto); IO.enumCase(Value, "CSharp", FormatStyle::LK_CSharp); IO.enumCase(Value, "Json", FormatStyle::LK_Json); + IO.enumCase(Value, "Verilog", FormatStyle::LK_Verilog); } }; @@ -693,9 +694,11 @@ IO.mapOptional("BraceWrapping", Style.BraceWrapping); IO.mapOptional("BreakBeforeBinaryOperators", Style.BreakBeforeBinaryOperators); + IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); IO.mapOptional("BreakBeforeConceptDeclarations", Style.BreakBeforeConceptDeclarations); - IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); + IO.mapOptional("BreakBetweenInstancePorts", + Style.BreakBetweenInstancePorts); bool BreakBeforeInheritanceComma = false; IO.mapOptional("BreakBeforeInheritanceComma", BreakBeforeInheritanceComma); @@ -1204,9 +1207,10 @@ LLVMStyle.BinPackArguments = true; LLVMStyle.BinPackParameters = true; LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; + LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; LLVMStyle.BreakBeforeConceptDeclarations = FormatStyle::BBCDS_Always; LLVMStyle.BreakBeforeTernaryOperators = true; - LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; + LLVMStyle.BreakBetweenInstancePorts = true; LLVMStyle.BraceWrapping = {/*AfterCaseLabel=*/false, /*AfterClass=*/false, /*AfterControlStatement=*/FormatStyle::BWACS_Never, @@ -1258,7 +1262,8 @@ LLVMStyle.IncludeStyle.IncludeIsMainRegex = "(Test)?$"; LLVMStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve; LLVMStyle.IndentAccessModifiers = false; - LLVMStyle.IndentCaseLabels = false; + LLVMStyle.IndentCaseLabels = + Language == FormatStyle::LK_Verilog ? true : false; LLVMStyle.IndentCaseBlocks = false; LLVMStyle.IndentGotoLabels = true; LLVMStyle.IndentPPDirectives = FormatStyle::PPDIS_None; @@ -3400,29 +3405,51 @@ "parameters, e.g.:\n" " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; +static const ArrayRef< + std::pair>> +LanguageSuffixes() { + static const std::vector< + std::pair>> + List = {{FormatStyle::LK_CSharp, {".cs"}}, + {FormatStyle::LK_Java, {".java"}}, + {FormatStyle::LK_JavaScript, + {".mjs", ".js", ".ts"}}, // (module) JavaScript or TypeScript. + {FormatStyle::LK_Json, {".json"}}, + {FormatStyle::LK_ObjC, {".m", ".mm"}}, + {FormatStyle::LK_Proto, {".proto", ".protodevel"}}, + {FormatStyle::LK_TableGen, {".td"}}, + {FormatStyle::LK_TextProto, + {".textpb", ".pb.txt", ".textproto", ".asciipb"}}, + {FormatStyle::LK_Verilog, {".sv", ".svh", ".v", ".vh"}}}; + return List; +} + +StringRef getAssumeFilenameHelp() { + static std::string text; + + text = "Override filename used to determine the language.\n" + "When reading from stdin, clang-format assumes this\n" + "filename to determine the language.\n" + "Unrecognized filenames are treated as C++.\n" + "supported:\n"; + for (auto &Lang : LanguageSuffixes()) { + text += " "; + text += getLanguageName(Lang.first); + text += ":"; + for (auto Suffix : Lang.second) { + text += " "; + text += Suffix; + } + text += "\n"; + } + return text; +} + static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { - if (FileName.endswith(".java")) - return FormatStyle::LK_Java; - if (FileName.endswith_insensitive(".js") || - FileName.endswith_insensitive(".mjs") || - FileName.endswith_insensitive(".ts")) - return FormatStyle::LK_JavaScript; // (module) JavaScript or TypeScript. - if (FileName.endswith(".m") || FileName.endswith(".mm")) - return FormatStyle::LK_ObjC; - if (FileName.endswith_insensitive(".proto") || - FileName.endswith_insensitive(".protodevel")) - return FormatStyle::LK_Proto; - if (FileName.endswith_insensitive(".textpb") || - FileName.endswith_insensitive(".pb.txt") || - FileName.endswith_insensitive(".textproto") || - FileName.endswith_insensitive(".asciipb")) - return FormatStyle::LK_TextProto; - if (FileName.endswith_insensitive(".td")) - return FormatStyle::LK_TableGen; - if (FileName.endswith_insensitive(".cs")) - return FormatStyle::LK_CSharp; - if (FileName.endswith_insensitive(".json")) - return FormatStyle::LK_Json; + for (auto &Lang : LanguageSuffixes()) + for (auto Suffix : Lang.second) + if (FileName.endswith(Suffix)) + return Lang.first; return FormatStyle::LK_Cpp; } diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -59,6 +59,7 @@ TYPE(FunctionLBrace) \ TYPE(FunctionLikeOrFreestandingMacro) \ TYPE(FunctionTypeLParen) \ + TYPE(GotoLabelColon) \ TYPE(IfMacro) \ TYPE(ImplicitStringLiteral) \ TYPE(InheritanceColon) \ @@ -76,7 +77,8 @@ TYPE(JsTypeOperator) \ TYPE(JsTypeOptionalQuestion) \ TYPE(JsAndAndEqual) \ - TYPE(LambdaArrow) \ + TYPE(LambdaArrow) /* Java lambda arrow, abused to mark the Verilog \ + implication operator */ \ TYPE(LambdaLBrace) \ TYPE(LambdaLSquare) \ TYPE(LeadingJavaAnnotation) \ @@ -110,7 +112,8 @@ TYPE(RequiresExpressionLBrace) \ TYPE(RequiresExpressionLParen) \ TYPE(SelectorName) \ - TYPE(StartOfName) \ + TYPE(StartOfName) /* the first name in a list of declarations, like `a` in \ + `int a, b;` */ \ TYPE(StatementAttributeLikeMacro) \ TYPE(StatementMacro) \ TYPE(StructLBrace) \ @@ -134,6 +137,20 @@ TYPE(CSharpGenericTypeConstraint) \ TYPE(CSharpGenericTypeConstraintColon) \ TYPE(CSharpGenericTypeConstraintComma) \ + TYPE(VerilogAssignComma) /* those that separate assignments in an assign \ + statement */ \ + TYPE(VerilogBlockEvent) /* begin and end in a block event expression in a \ + coverage event, section A.2.11 */ \ + TYPE(VerilogBlockLabelColon) /* like in begin : block */ \ + TYPE(VerilogDimensionedTypeName) \ + TYPE(VerilogInstancePortLParen) /* list of port connections or parameters in \ + a module instantiation */ \ + TYPE(VerilogInstancePortComma) \ + TYPE(VerilogNumberBase) /* for the base in a number literal, not including \ + the quote */ \ + TYPE(VerilogStrength) /* like `(strong1, pull0)` */ \ + TYPE(VerilogTableItem) \ + TYPE(VerilogTypeComma) /* those that separate ports of different types */ \ TYPE(Unknown) /// Determines the semantic type of a syntactic token, e.g. whether "<" is a @@ -367,6 +384,9 @@ } bool isTypeFinalized() const { return TypeIsFinalized; } + /// Used to set an operator precedence explicitly. + prec::Level ForcedPrecedence = prec::Unknown; + /// The number of newlines immediately before the \c Token. /// /// This can be used to determine what the user wrote in the original code @@ -699,6 +719,8 @@ } prec::Level getPrecedence() const { + if (ForcedPrecedence != prec::Unknown) + return ForcedPrecedence; return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true, /*CPlusPlus11=*/true); } @@ -712,8 +734,8 @@ } /// Returns the next token ignoring comments. - LLVM_NODISCARD const FormatToken *getNextNonComment() const { - const FormatToken *Tok = Next; + LLVM_NODISCARD FormatToken *getNextNonComment() const { + FormatToken *Tok = Next; while (Tok && Tok->is(tok::comment)) Tok = Tok->Next; return Tok; @@ -895,8 +917,11 @@ /// properly supported by Clang's lexer. struct AdditionalKeywords { #define LIST_ADDITIONAL_KEYWORDS \ + /* Preprocessor */ \ + KEYWORD(__FILE__, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(__LINE__, ATTR_VERILOG_PP_DIRECTIVE) \ /* Context-sensitive */ \ - KEYWORD(final, 0) \ + KEYWORD(final, ATTR_VERILOG_KEYWORD) \ KEYWORD(override, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ KEYWORD(in, ATTR_CSHARP_KEYWORD) \ KEYWORD(of, 0) \ @@ -913,30 +938,37 @@ KEYWORD(declare, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ KEYWORD(finally, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ KEYWORD(from, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ - KEYWORD(function, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(function, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD | \ + ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ KEYWORD(get, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ - KEYWORD(import, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(import, \ + ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD | ATTR_VERILOG_KEYWORD) \ KEYWORD(infer, 0) \ KEYWORD(is, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD | ATTR_CSHARP_KEYWORD) \ - KEYWORD(let, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ - KEYWORD(module, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(let, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD | ATTR_VERILOG_KEYWORD | \ + ATTR_VERILOG_QUALIFIER) \ + KEYWORD(module, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD | ATTR_VERILOG_HIER) \ KEYWORD(readonly, \ ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD | ATTR_CSHARP_KEYWORD) \ KEYWORD(set, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ KEYWORD(type, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ KEYWORD(typeof, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ - KEYWORD(var, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(var, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD | ATTR_VERILOG_KEYWORD | \ + ATTR_VERILOG_QUALIFIER) \ KEYWORD(yield, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ /* Java */ \ KEYWORD(abstract, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ - KEYWORD(assert, 0) \ - KEYWORD(extends, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ - KEYWORD(implements, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(assert, ATTR_VERILOG_KEYWORD) \ + KEYWORD(extends, \ + ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD | ATTR_VERILOG_KEYWORD) \ + KEYWORD(implements, \ + ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD | ATTR_VERILOG_KEYWORD) \ KEYWORD(instanceof, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD) \ - KEYWORD(interface, \ - ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD | ATTR_CSHARP_KEYWORD) \ + KEYWORD(interface, ATTR_JS_KEYWORD | ATTR_CSHARP_KEYWORD | \ + ATTR_CSHARP_KEYWORD | ATTR_VERILOG_KEYWORD | \ + ATTR_VERILOG_HIER) \ KEYWORD(native, 0) \ - KEYWORD(package, 0) \ + KEYWORD(package, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ KEYWORD(synchronized, 0) \ KEYWORD(throws, 0) \ KEYWORD(__except, 0) \ @@ -965,7 +997,7 @@ KEYWORD(delegate, ATTR_CSHARP_KEYWORD) \ KEYWORD(event, ATTR_CSHARP_KEYWORD) \ KEYWORD(fixed, ATTR_CSHARP_KEYWORD) \ - KEYWORD(foreach, ATTR_CSHARP_KEYWORD) \ + KEYWORD(foreach, ATTR_CSHARP_KEYWORD | ATTR_VERILOG_KEYWORD) \ KEYWORD(implicit, ATTR_CSHARP_KEYWORD) \ KEYWORD(init, ATTR_CSHARP_KEYWORD) \ KEYWORD(internal, ATTR_CSHARP_KEYWORD) \ @@ -974,7 +1006,8 @@ KEYWORD(object, ATTR_CSHARP_KEYWORD) \ KEYWORD(out, ATTR_CSHARP_KEYWORD) \ KEYWORD(params, ATTR_CSHARP_KEYWORD) \ - KEYWORD(ref, ATTR_CSHARP_KEYWORD) \ + KEYWORD(ref, \ + ATTR_CSHARP_KEYWORD | ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ KEYWORD(string, ATTR_CSHARP_KEYWORD) \ KEYWORD(stackalloc, ATTR_CSHARP_KEYWORD) \ KEYWORD(sbyte, ATTR_CSHARP_KEYWORD) \ @@ -985,13 +1018,148 @@ KEYWORD(unsafe, ATTR_CSHARP_KEYWORD) \ KEYWORD(ushort, ATTR_CSHARP_KEYWORD) \ KEYWORD(when, ATTR_CSHARP_KEYWORD) \ - KEYWORD(where, ATTR_CSHARP_KEYWORD) + KEYWORD(where, ATTR_CSHARP_KEYWORD) \ + /* Verilog */ \ + KEYWORD(always, ATTR_VERILOG_KEYWORD) \ + KEYWORD(always_comb, ATTR_VERILOG_KEYWORD) \ + KEYWORD(always_ff, ATTR_VERILOG_KEYWORD) \ + KEYWORD(always_latch, ATTR_VERILOG_KEYWORD) \ + /* Verilog doesn't have all of C++'s operator keywords. And those that it \ + * has have different precedence. So they are defined here. */ \ + KEYWORD(and, ATTR_VERILOG_KEYWORD) \ + KEYWORD(assign, ATTR_VERILOG_KEYWORD) \ + KEYWORD(assume, ATTR_VERILOG_KEYWORD) \ + KEYWORD(automatic, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(before, ATTR_VERILOG_KEYWORD) \ + KEYWORD(begin, ATTR_VERILOG_KEYWORD) \ + KEYWORD(begin_keywords, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(bins, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(binsof, ATTR_VERILOG_KEYWORD) \ + KEYWORD(casex, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ + KEYWORD(casez, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ + KEYWORD(celldefine, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(checker, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ + KEYWORD(clocking, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ + KEYWORD(constraint, ATTR_VERILOG_KEYWORD) \ + KEYWORD(cover, ATTR_VERILOG_KEYWORD) \ + KEYWORD(covergroup, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ + KEYWORD(coverpoint, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(default_decay_time, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(default_nettype, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(default_trireg_strength, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(delay_mode_distributed, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(delay_mode_path, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(delay_mode_unit, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(delay_mode_zero, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(disable, ATTR_VERILOG_KEYWORD) \ + KEYWORD(dist, ATTR_VERILOG_KEYWORD) \ + KEYWORD(elsif, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(end, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(end_keywords, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(endcase, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endcelldefine, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(endclass, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endclocking, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endchecker, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endfunction, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endgenerate, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endgroup, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endinterface, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endmodule, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endpackage, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endprimitive, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endprogram, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endproperty, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endsequence, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endspecify, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endtable, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(endtask, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(forever, ATTR_VERILOG_KEYWORD) \ + KEYWORD(fork, ATTR_VERILOG_KEYWORD) \ + KEYWORD(generate, ATTR_VERILOG_KEYWORD) \ + KEYWORD(highz0, ATTR_VERILOG_KEYWORD) \ + KEYWORD(highz1, ATTR_VERILOG_KEYWORD) \ + KEYWORD(iff, ATTR_VERILOG_KEYWORD) \ + KEYWORD(ifnone, ATTR_VERILOG_KEYWORD) \ + KEYWORD(ignore_bins, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(illegal_bins, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(initial, ATTR_VERILOG_KEYWORD) \ + KEYWORD(inout, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(input, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(inside, ATTR_VERILOG_KEYWORD) \ + KEYWORD(interconnect, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(intersect, ATTR_VERILOG_KEYWORD) \ + KEYWORD(join, ATTR_VERILOG_KEYWORD) \ + KEYWORD(join_any, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(join_none, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_END) \ + KEYWORD(large, ATTR_VERILOG_KEYWORD) \ + KEYWORD(local, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(localparam, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(macromodule, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ + KEYWORD(matches, ATTR_VERILOG_KEYWORD) \ + KEYWORD(medium, ATTR_VERILOG_KEYWORD) \ + KEYWORD(nounconnected_drive, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(or, ATTR_VERILOG_KEYWORD) \ + KEYWORD(output, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(packed, ATTR_VERILOG_KEYWORD) \ + KEYWORD(parameter, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(primitive, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ + KEYWORD(priority, ATTR_VERILOG_KEYWORD) \ + KEYWORD(program, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ + KEYWORD(property, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ + KEYWORD(pull0, ATTR_VERILOG_KEYWORD) \ + KEYWORD(pull1, ATTR_VERILOG_KEYWORD) \ + KEYWORD(pure, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(rand, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(randc, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(randcase, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ + KEYWORD(randsequence, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ + KEYWORD(repeat, ATTR_VERILOG_KEYWORD) \ + KEYWORD(resetall, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(sample, ATTR_VERILOG_KEYWORD) \ + KEYWORD(scalared, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(sequence, ATTR_VERILOG_KEYWORD) \ + KEYWORD(small, ATTR_VERILOG_KEYWORD) \ + KEYWORD(soft, ATTR_VERILOG_KEYWORD) \ + KEYWORD(solve, ATTR_VERILOG_KEYWORD) \ + KEYWORD(specify, ATTR_VERILOG_KEYWORD) \ + KEYWORD(specparam, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(strong0, ATTR_VERILOG_KEYWORD) \ + KEYWORD(strong1, ATTR_VERILOG_KEYWORD) \ + KEYWORD(supply0, ATTR_VERILOG_KEYWORD) \ + KEYWORD(supply1, ATTR_VERILOG_KEYWORD) \ + KEYWORD(table, ATTR_VERILOG_KEYWORD) \ + KEYWORD(tagged, ATTR_VERILOG_KEYWORD) \ + KEYWORD(task, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_HIER) \ + KEYWORD(timescale, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(tri, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(tri0, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(tri1, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(triand, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(trior, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(trireg, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(unconnected_drive, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(undefineall, ATTR_VERILOG_PP_DIRECTIVE) \ + KEYWORD(unique, ATTR_VERILOG_KEYWORD) \ + KEYWORD(unique0, ATTR_VERILOG_KEYWORD) \ + KEYWORD(uwire, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(vectored, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(wand, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(weak0, ATTR_VERILOG_KEYWORD) \ + KEYWORD(weak1, ATTR_VERILOG_KEYWORD) \ + KEYWORD(wildcard, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(wire, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) \ + KEYWORD(with, ATTR_VERILOG_KEYWORD) \ + KEYWORD(wor, ATTR_VERILOG_KEYWORD | ATTR_VERILOG_QUALIFIER) AdditionalKeywords(IdentifierTable &IdentTable) { #define KEYWORD(WORD, ATTRS) kw_##WORD = &IdentTable.get(#WORD); LIST_ADDITIONAL_KEYWORDS #undef KEYWORD + backtick = &IdentTable.get("`"); + backtickbacktick = &IdentTable.get("``"); + quote = &IdentTable.get("\'"); kw_internal_ident_after_define = &IdentTable.get("__CLANG_FORMAT_INTERNAL_IDENT_AFTER_DEFINE__"); @@ -1005,6 +1173,11 @@ enum { ATTR_JS_KEYWORD = 0x1, ATTR_CSHARP_KEYWORD = 0x2, + ATTR_VERILOG_KEYWORD = 0x4, + ATTR_VERILOG_PP_DIRECTIVE = 0x8, + ATTR_VERILOG_END = 0x10, + ATTR_VERILOG_HIER = 0x20, + ATTR_VERILOG_QUALIFIER = 0x40 }; unsigned getAttrs(const FormatToken &Tok) const { auto At = KeywordAttr.find(Tok.Tok.getIdentifierInfo()); @@ -1015,8 +1188,15 @@ LIST_ADDITIONAL_KEYWORDS #undef KEYWORD - // For internal use by clang-format. IdentifierInfo *kw_internal_ident_after_define; + IdentifierInfo *backtick; + IdentifierInfo *backtickbacktick; + IdentifierInfo *quote; + + /// Returns \c true if \p Tok is a symbol defined here. + bool isAdditionalSymbol(const FormatToken &Tok) const { + return Tok.isOneOf(backtick, backtickbacktick, quote); + } /// Returns \c true if \p Tok is a true JavaScript identifier, returns /// \c false if it is a keyword or a pseudo keyword. @@ -1062,7 +1242,8 @@ // These are JS keywords that are lexed by LLVM/clang as keywords. return false; case tok::identifier: - return AcceptIdentifierName || !(getAttrs(Tok) & ATTR_JS_KEYWORD); + return AcceptIdentifierName || + (!isAdditionalSymbol(Tok) && !(getAttrs(Tok) & ATTR_JS_KEYWORD)); default: return isCXXKeyword(Tok); } @@ -1139,6 +1320,157 @@ } } + /// Returns whether \p Tok is a Verilog preprocessor directive. + bool isVerilogPPDirective(const FormatToken &Tok) const { + auto Info = Tok.Tok.getIdentifierInfo(); + if (!Info) + return false; + switch (Info->getPPKeywordID()) { + case tok::pp_define: + case tok::pp_else: + case tok::pp_endif: + case tok::pp_ifdef: + case tok::pp_ifndef: + case tok::pp_include: + case tok::pp_line: + case tok::pp_pragma: + case tok::pp_undef: + return true; + default: + return getAttrs(Tok) & ATTR_VERILOG_PP_DIRECTIVE; + } + } + + /// Returns whether \p Tok is a Verilog keyword that opens a block. + bool isVerilogBegin(const FormatToken &Tok) const { + // `table` is not included since it needs to be treated specially. + return !Tok.is(TT_VerilogBlockEvent) && + Tok.isOneOf(kw_begin, kw_fork, kw_generate, kw_specify); + } + + /// Returns whether \p Tok is a Verilog keyword that closes a block. + bool isVerilogEnd(const FormatToken &Tok) const { + if (Tok.is(TT_VerilogBlockEvent)) + return false; + if (Tok.is(kw_join)) { + const FormatToken *Prev = Tok.getPreviousNonComment(); + return !(Prev && Prev->is(kw_rand)); + } + return getAttrs(Tok) & ATTR_VERILOG_END; + } + + /// Returns whether \p Tok is a Verilog keyword that opens a module, etc. + bool isVerilogHier(const FormatToken &Tok) const { + const FormatToken *Prev = Tok.getPreviousNonComment(); + if (Tok.is(kw_function)) + return !(Prev && Prev->is(kw_with)); + if (Tok.is(kw_property)) + return !(Prev && + Prev->isOneOf(tok::kw_restrict, kw_assert, kw_assume, kw_cover)); + return Tok.isOneOf(tok::kw_case, tok::kw_class) || + (getAttrs(Tok) & ATTR_VERILOG_HIER); + } + + /// Returns whether \p Tok is a Verilog keyword that starts a + /// structured procedure like always. + bool isVerilogStructuredProcedure(const FormatToken &Tok) const { + return Tok.isOneOf(kw_always, kw_always_comb, kw_always_ff, kw_always_latch, + kw_final, kw_forever, kw_initial); + } + + bool isVerilogQualifier(const FormatToken &Tok) const { + switch (Tok.Tok.getKind()) { + case tok::kw_extern: + case tok::kw_signed: + case tok::kw_static: + case tok::kw_unsigned: + case tok::kw_virtual: + return true; + case tok::identifier: + return getAttrs(Tok) & ATTR_VERILOG_QUALIFIER; + default: + return false; + } + } + + bool isVerilogWordOperator(const FormatToken &Tok) const { + return Tok.isOneOf(kw_before, kw_intersect, kw_dist, kw_iff, kw_inside, + kw_or, kw_with); + } + + bool isVerilogIdentifier(const FormatToken &Tok) const { + switch (Tok.Tok.getKind()) { + case tok::kw_case: + case tok::kw_class: + case tok::kw_const: + case tok::kw_continue: + case tok::kw_default: + case tok::kw_do: + case tok::kw_extern: + case tok::kw_else: + case tok::kw_enum: + case tok::kw_for: + case tok::kw_if: + case tok::kw_restrict: + case tok::kw_signed: + case tok::kw_static: + case tok::kw_struct: + case tok::kw_typedef: + case tok::kw_union: + case tok::kw_unsigned: + case tok::kw_virtual: + case tok::kw_while: + return false; + case tok::identifier: + return !isAdditionalSymbol(Tok) && + !(getAttrs(Tok) & ATTR_VERILOG_KEYWORD); + default: + return isCXXKeyword(Tok); + } + } + + bool isIdentifier(const FormatToken &Tok, const FormatStyle &Style) const { + // FIXME: Add cases for other languages. + switch (Style.Language) { + case FormatStyle::LK_Cpp: + return Tok.is(tok::identifier); + case FormatStyle::LK_JavaScript: + return IsJavaScriptIdentifier(Tok); + case FormatStyle::LK_Verilog: + return isVerilogIdentifier(Tok); + default: + return Tok.is(tok::identifier); + } + } + + /// Whether the token is to be treated like '#'. Because Verilog uses + /// backtick instead. + bool isPPHash(const FormatToken &Tok, const FormatStyle &Style) const { + return Style.Language == FormatStyle::LK_Verilog ? Tok.is(backtick) + : Tok.is(tok::hash); + } + + bool isPPHashHash(const FormatToken &Tok, const FormatStyle &Style) const { + return Style.Language == FormatStyle::LK_Verilog ? Tok.is(backtickbacktick) + : Tok.is(tok::hashhash); + } + + /// Whether the token begins a block. + bool isBlockBegin(const FormatToken &Tok, const FormatStyle &Style) const { + return Style.Language == FormatStyle::LK_Verilog ? isVerilogBegin(Tok) + : Tok.is(tok::l_brace); + } + + bool isEndOfLabel(const FormatToken &Tok, const FormatStyle &Style) const { + const FormatToken *Next = Tok.getNextNonComment(); + // In Verilog the colon in a default label is optional. + return Tok.is(TT_GotoLabelColon) || + (Style.Language == FormatStyle::LK_Verilog && + Tok.is(tok::kw_default) && + !(Next && Next->isOneOf(tok::colon, tok::semi, kw_clocking, kw_iff, + kw_input, kw_output, kw_sequence))); + } + private: std::unordered_map KeywordAttr; diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -60,7 +60,10 @@ bool tryMergeForEach(); bool tryTransformTryUsageForC(); + bool tryMergeTokensAny(ArrayRef> Kinds, + TokenType NewType); bool tryMergeTokens(ArrayRef Kinds, TokenType NewType); + bool tryMergeTokens(size_t Count, TokenType NewType); // Returns \c true if \p Tok can only be followed by an operand in JavaScript. bool precedesOperand(FormatToken *Tok); @@ -92,6 +95,8 @@ bool tryMergeConflictMarkers(); + void resizeToken(size_t NewLen); + FormatToken *getStashedToken(); FormatToken *getNextToken(); @@ -124,6 +129,9 @@ // Targets that may appear inside a C# attribute. static const llvm::StringSet<> CSharpAttributeTargets; + /// Handle language-specific tokens. + bool readRawTokenLanguageSpecific(Token &Tok); + void readRawToken(FormatToken &Tok); void resetLexer(unsigned Offset); diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -81,9 +81,9 @@ if (Style.isJavaScript()) { tryParseJSRegexLiteral(); handleTemplateStrings(); - } - if (Style.Language == FormatStyle::LK_TextProto) + } else if (Style.Language == FormatStyle::LK_TextProto) { tryParsePythonComment(); + } tryMergePreviousTokens(); if (Style.isCSharp()) // This needs to come after tokens have been merged so that C# @@ -192,6 +192,63 @@ if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator)) return; } + + if (Style.Language == FormatStyle::LK_Verilog) { + // Merge the number following a base like `'h?a0`. + if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) && + Tokens.end()[-2]->is(tok::numeric_constant) && + Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier, + tok::question) && + tryMergeTokens(2, TT_Unknown)) + return; + // Part select. + if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}}, + TT_BitFieldColon)) + return; + // Signed shift and distribution weight. + if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) { + Tokens.back()->Tok.setKind(tok::lessless); + return; + } + if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) { + Tokens.back()->Tok.setKind(tok::greatergreater); + return; + } + if (tryMergeTokensAny({{tok::lessless, tok::equal}, + {tok::lessless, tok::lessequal}, + {tok::greatergreater, tok::equal}, + {tok::greatergreater, tok::greaterequal}, + {tok::colon, tok::equal}, + {tok::colon, tok::slash}}, + TT_BinaryOperator)) { + Tokens.back()->ForcedPrecedence = prec::Assignment; + return; + } + // Signed shift, case equality, and wildcard equality. + if (tryMergeTokensAny({{tok::lessless, tok::less}, + {tok::greatergreater, tok::greater}, + {tok::exclaimequal, tok::equal}, + {tok::exclaimequal, tok::question}, + {tok::equalequal, tok::equal}, + {tok::equalequal, tok::question}}, + TT_BinaryOperator)) + return; + // Module paths in specify blocks and implications in properties. + if (tryMergeTokensAny({{tok::plusequal, tok::greater}, + {tok::plus, tok::star, tok::greater}, + {tok::minusequal, tok::greater}, + {tok::minus, tok::star, tok::greater}, + {tok::equal, tok::greater}, + {tok::star, tok::greater}, + {tok::pipeequal, tok::greater}, + {tok::pipe, tok::arrow}, + {tok::hash, tok::minus, tok::hash}, + {tok::hash, tok::equal, tok::hash}}, + TT_BinaryOperator)) { + Tokens.back()->ForcedPrecedence = prec::Comma; + return; + } + } } bool FormatTokenLexer::tryMergeNSStringLiteral() { @@ -410,7 +467,8 @@ if (!Try->is(tok::kw_try)) return false; auto &Next = *(Tokens.end() - 1); - if (Next->isOneOf(tok::l_brace, tok::colon, tok::hash, tok::comment)) + if (Next->isOneOf(tok::l_brace, tok::colon, tok::comment) || + Keywords.isPPHash(*Next, Style)) return false; if (Tokens.size() > 2) { @@ -452,6 +510,14 @@ return true; } +bool FormatTokenLexer::tryMergeTokensAny( + ArrayRef> Kinds, TokenType NewType) { + return std::any_of(Kinds.begin(), Kinds.end(), + [this, NewType](ArrayRef Kinds) { + return tryMergeTokens(Kinds, NewType); + }); +} + bool FormatTokenLexer::tryMergeTokens(ArrayRef Kinds, TokenType NewType) { if (Tokens.size() < Kinds.size()) @@ -459,15 +525,28 @@ SmallVectorImpl::const_iterator First = Tokens.end() - Kinds.size(); - if (!First[0]->is(Kinds[0])) + for (unsigned i = 0; i < Kinds.size(); ++i) + if (!First[i]->is(Kinds[i])) + return false; + + return tryMergeTokens(Kinds.size(), NewType); +} + +bool FormatTokenLexer::tryMergeTokens(size_t Count, TokenType NewType) { + if (Tokens.size() < Count) return false; + + SmallVectorImpl::const_iterator First = Tokens.end() - Count; unsigned AddLength = 0; - for (unsigned i = 1; i < Kinds.size(); ++i) { - if (!First[i]->is(Kinds[i]) || First[i]->hasWhitespaceBefore()) + for (size_t i = 1; i < Count; ++i) { + // If there is whitespace separating the token and the previous one, + // they should not be merged. + if (First[i]->hasWhitespaceBefore()) return false; AddLength += First[i]->TokenText.size(); } - Tokens.resize(Tokens.size() - Kinds.size() + 1); + + Tokens.resize(Tokens.size() - Count + 1); First[0]->TokenText = StringRef(First[0]->TokenText.data(), First[0]->TokenText.size() + AddLength); First[0]->ColumnWidth += AddLength; @@ -836,6 +915,59 @@ return FormatTok; } +void FormatTokenLexer::resizeToken(size_t NewLen) { + resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation( + Lex->getBufferLocation() - FormatTok->TokenText.size() + NewLen))); + FormatTok->TokenText = FormatTok->TokenText.substr(0, NewLen); + FormatTok->ColumnWidth = encoding::columnWidthWithTabs( + FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth, + Encoding); + FormatTok->Tok.setLength(NewLen); +} + +/// Count the length of leading whitespace in a token. +static size_t countLeadingWhitespace(StringRef Text) { + // Basically counting the length matched by this regex. + // "^([\n\r\f\v \t]|(\\\\|\\?\\?/)[\n\r])+" + // Directly using the regex turned out to be slow. With the regex + // version formatting all files in this directory took about 1.25 + // seconds. This version took about 0.5 seconds. + bool Done = false; + const char *Cur = Text.begin(); + while (!Done && Cur < Text.end()) + switch (Cur[0]) { + case '\n': + case '\r': + case '\f': + case '\v': + case ' ': + case '\t': + ++Cur; + break; + // A '\' followed by a newline always escapes the newline, regardless + // of whether there is another '\' before it. + case '\\': + // The source has a null byte at the end. It is not necessary to + // check Cur + 1 < Text.end(). + if (Cur[1] == '\n' || Cur[1] == '\r') + Cur += 2; + else + Done = true; + break; + // Newlines can also be escaped by a '?' '?' '/' trigraph. + case '?': + if (Cur[1] == '?' && Cur[2] == '/' && (Cur[3] == '\n' || Cur[3] == '\r')) + Cur += 4; + else + Done = true; + break; + default: + Done = true; + break; + } + return Cur - Text.begin(); +} + FormatToken *FormatTokenLexer::getNextToken() { if (StateStack.top() == LexerState::TOKEN_STASHED) { StateStack.pop(); @@ -850,34 +982,29 @@ IsFirstToken = false; // Consume and record whitespace until we find a significant token. + // Some tok::unknown tokens are not just whitespace, e.g. whitespace + // followed by a symbol such as backtick. Those symbols may be + // significant in other languages. unsigned WhitespaceLength = TrailingWhitespace; - while (FormatTok->is(tok::unknown)) { + while (FormatTok->isNot(tok::eof)) { + auto LeadingWhitespace = countLeadingWhitespace(FormatTok->TokenText); + if (!LeadingWhitespace) + break; + if (LeadingWhitespace < FormatTok->TokenText.size()) + resizeToken(LeadingWhitespace); StringRef Text = FormatTok->TokenText; - auto EscapesNewline = [&](int pos) { - // A '\r' here is just part of '\r\n'. Skip it. - if (pos >= 0 && Text[pos] == '\r') - --pos; - // See whether there is an odd number of '\' before this. - // FIXME: This is wrong. A '\' followed by a newline is always removed, - // regardless of whether there is another '\' before it. - // FIXME: Newlines can also be escaped by a '?' '?' '/' trigraph. - unsigned count = 0; - for (; pos >= 0; --pos, ++count) - if (Text[pos] != '\\') - break; - return count & 1; - }; - // FIXME: This miscounts tok:unknown tokens that are not just - // whitespace, e.g. a '`' character. + bool InEscape = false; for (int i = 0, e = Text.size(); i != e; ++i) { switch (Text[i]) { + case '\r': + if (i + 1 < e && Text[i + 1] == '\n') + break; + LLVM_FALLTHROUGH; case '\n': ++FormatTok->NewlinesBefore; - FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1); - FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; - Column = 0; - break; - case '\r': + if (!InEscape) + FormatTok->HasUnescapedNewline = true; + InEscape = false; FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; Column = 0; break; @@ -893,24 +1020,23 @@ Style.TabWidth - (Style.TabWidth ? Column % Style.TabWidth : 0); break; case '\\': - if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n')) - FormatTok->setType(TT_ImplicitStringLiteral); + case '?': + case '/': + InEscape = true; break; default: - FormatTok->setType(TT_ImplicitStringLiteral); + // This shouldn't happen. + assert(false); break; } - if (FormatTok->getType() == TT_ImplicitStringLiteral) - break; } - - if (FormatTok->is(TT_ImplicitStringLiteral)) - break; - WhitespaceLength += FormatTok->Tok.getLength(); - + WhitespaceLength += Text.size(); readRawToken(*FormatTok); } + if (FormatTok->is(tok::unknown)) + FormatTok->setType(TT_ImplicitStringLiteral); + // JavaScript and Java do not allow to escape the end of the line with a // backslash. Backslashes are syntax errors in plain source, but can occur in // comments. When a single line comment ends with a \, it'll cause the next @@ -924,39 +1050,32 @@ while (BackslashPos != StringRef::npos) { if (BackslashPos + 1 < FormatTok->TokenText.size() && FormatTok->TokenText[BackslashPos + 1] == '\n') { - const char *Offset = Lex->getBufferLocation(); - Offset -= FormatTok->TokenText.size(); - Offset += BackslashPos + 1; - resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); - FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1); - FormatTok->ColumnWidth = encoding::columnWidthWithTabs( - FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth, - Encoding); + resizeToken(BackslashPos + 1); break; } BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1); } } - // In case the token starts with escaped newlines, we want to - // take them into account as whitespace - this pattern is quite frequent - // in macro definitions. - // FIXME: Add a more explicit test. - while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\') { - unsigned SkippedWhitespace = 0; - if (FormatTok->TokenText.size() > 2 && - (FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n')) - SkippedWhitespace = 3; - else if (FormatTok->TokenText[1] == '\n') - SkippedWhitespace = 2; - else - break; + if (Style.Language == FormatStyle::LK_Verilog) { + static const llvm::Regex NumberBase("^s?[bdho]", llvm::Regex::IgnoreCase); + SmallVector Matches; - ++FormatTok->NewlinesBefore; - WhitespaceLength += SkippedWhitespace; - FormatTok->LastNewlineOffset = SkippedWhitespace; - Column = 0; - FormatTok->TokenText = FormatTok->TokenText.substr(SkippedWhitespace); + // In Verilog the quote is not part of a number. + if (FormatTok->is(tok::numeric_constant)) { + auto Quote = FormatTok->TokenText.find('\''); + if (Quote != StringRef::npos) + resizeToken(Quote); + } + + // In Verilog a in based number literal like `'b10`, there may be + // whitespace between `'b` and `10`. In that case reset the lexer to + // after `'b`. + if (Tokens.size() && Tokens.back()->is(Keywords.quote) && + NumberBase.match(FormatTok->TokenText, &Matches)) { + resizeToken(Matches[0].size()); + FormatTok->setType(TT_VerilogNumberBase); + } } FormatTok->WhitespaceRange = SourceRange( @@ -997,6 +1116,13 @@ StateStack.push(LexerState::TOKEN_STASHED); } + if (Style.Language == FormatStyle::LK_Verilog) { + // Mark the number following a base like `'h?a0` as a number. + if (Tokens.size() && Tokens.back()->is(TT_VerilogNumberBase) && + FormatTok->Tok.isOneOf(tok::identifier, tok::question)) + FormatTok->Tok.setKind(tok::numeric_constant); + } + // Now FormatTok is the next non-whitespace token. StringRef Text = FormatTok->TokenText; @@ -1046,8 +1172,50 @@ return FormatTok; } +bool FormatTokenLexer::readRawTokenLanguageSpecific(Token &Tok) { + if (Style.Language != FormatStyle::LK_Verilog) + return false; + + // In Verilog the quote is not a character literal. + // + // Make the backtick and double backtick identifiers to match against them + // more easily. + // + // In Verilog an escaped identifier starts with backslash and ends with + // whitespace. Unless that whitespace is an escaped newline. A backslash can + // also begin an escaped newline outside of an escaped identifier. We check + // for that outside of the Regex since we can't use negative lookhead + // assertions. Simply changing the '*' to '+' breaks stuff as the escaped + // identifier may have a length of 0 according to Section A.9.3. + // FIXME: If there is an escaped newline in the middle of an escaped + // identifier, allow for pasting the two lines together, But escaped + // identifiers usually occur only in generated code anyway. + static const llvm::Regex VerilogToken( + "^(\'|``?|\\\\(\\\\(\r?\n|\r)|[^[:space:]])*)"); + + SmallVector Matches; + const char *Start = Lex->getBufferLocation(); + if (!VerilogToken.match(StringRef(Start, Lex->getBuffer().end() - Start), + &Matches)) + return false; + // There is a null byte at the end of the buffer. + if (Start[0] == '\\' && (Start[1] == '\r' || Start[1] == '\n')) + return false; + size_t Len = Matches[0].size(); + + Tok.setLength(Len); + Tok.setLocation(Lex->getSourceLocation(Start, Len)); + // The kind has to be an identifier so we can match it against those + // defined in Keywords. + Tok.setKind(tok::raw_identifier); + Tok.setRawIdentifierData(Start); + Lex->skipOver(Len); + return true; +} + void FormatTokenLexer::readRawToken(FormatToken &Tok) { - Lex->LexFromRawLexer(Tok.Tok); + if (!readRawTokenLanguageSpecific(Tok.Tok)) + Lex->LexFromRawLexer(Tok.Tok); Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), Tok.Tok.getLength()); // For formatting, treat unterminated string literals like normal string diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h --- a/clang/lib/Format/TokenAnnotator.h +++ b/clang/lib/Format/TokenAnnotator.h @@ -43,6 +43,7 @@ MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), IsMultiVariableDeclStmt(false), Affected(false), LeadingEmptyLinesAffected(false), ChildrenAffected(false), + IsContinuation(Line.IsContinuation), FirstStartColumn(Line.FirstStartColumn) { assert(!Line.Tokens.empty()); @@ -139,6 +140,10 @@ /// \c True if one of this line's children intersects with an input range. bool ChildrenAffected; + /// \c True if this line should be indented by ContinuationIndent in addition + /// to the normal indention level. + bool IsContinuation; + unsigned FirstStartColumn; private: diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -221,11 +221,13 @@ } bool StartsObjCMethodExpr = false; - if (FormatToken *MaybeSel = OpeningParen.Previous) { - // @selector( starts a selector. - if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && - MaybeSel->Previous->is(tok::at)) - StartsObjCMethodExpr = true; + if (Style.isCpp()) { + if (FormatToken *MaybeSel = OpeningParen.Previous) { + // @selector( starts a selector. + if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && + MaybeSel->Previous && MaybeSel->Previous->is(tok::at)) + StartsObjCMethodExpr = true; + } } if (OpeningParen.is(TT_OverloadedOperatorLParen)) { @@ -242,6 +244,9 @@ bool OperatorCalledAsMemberFunction = Prev->Previous && Prev->Previous->isOneOf(tok::period, tok::arrow); Contexts.back().IsExpression = OperatorCalledAsMemberFunction; + } else if (OpeningParen.is(TT_VerilogInstancePortLParen)) { + Contexts.back().IsExpression = true; + Contexts.back().ContextType = Context::VerilogInstancePortList; } else if (Style.isJavaScript() && (Line.startsWith(Keywords.kw_type, tok::identifier) || Line.startsWith(tok::kw_export, Keywords.kw_type, @@ -728,7 +733,8 @@ // Remember that this is a [[using ns: foo]] C++ attribute, so we // don't add a space before the colon (unlike other colons). CurrentToken->setType(TT_AttributeColon); - } else if (Left->isOneOf(TT_ArraySubscriptLSquare, + } else if (Style.isCpp() && + Left->isOneOf(TT_ArraySubscriptLSquare, TT_DesignatedInitializerLSquare)) { Left->setType(TT_ObjCMethodExpr); StartsObjCMethodExpr = true; @@ -876,6 +882,10 @@ bool consumeToken() { FormatToken *Tok = CurrentToken; next(); + // In Verilog primitives' state tables, `:`, `?`, and `-` aren't normal + // operators. + if (Tok->is(TT_VerilogTableItem)) + return true; switch (Tok->Tok.getKind()) { case tok::plus: case tok::minus: @@ -909,6 +919,26 @@ Tok->setType(TT_CSharpNamedArgumentColon); break; } + } else if (Style.Language == FormatStyle::LK_Verilog && + !Tok->isOneOf(TT_BinaryOperator, TT_VerilogTableItem)) { + // The distribution weight operators are labeled + // TT_BinaryOperator by the lexer. + if (Keywords.isVerilogEnd(*Tok->Previous) || + Keywords.isVerilogBegin(*Tok->Previous)) + Tok->setType(TT_VerilogBlockLabelColon); + else if (Contexts.back().ContextKind == tok::l_square) + Tok->setType(TT_BitFieldColon); + else if (Contexts.back().ColonIsDictLiteral) + Tok->setType(TT_DictLiteral); + else if (Contexts.size() == 1) { + // In Verilog a case label doesn't have the case keyword. We + // assume a colon following an expression is a case label. + // Colons from ?: are annotated in parseConditional(). + Tok->setType(TT_GotoLabelColon); + if (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0)) + --Line.Level; + } + break; } if (Line.First->isOneOf(Keywords.kw_module, Keywords.kw_import) || Line.First->startsSequence(tok::kw_export, Keywords.kw_module) || @@ -971,7 +1001,8 @@ Tok->setType(TT_CtorInitializerColon); } else Tok->setType(TT_InheritanceColon); - } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next && + } else if (Style.isCpp() && canBeObjCSelectorComponent(*Tok->Previous) && + Tok->Next && (Tok->Next->isOneOf(tok::r_paren, tok::comma) || (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next && Tok->Next->Next->is(tok::colon)))) { @@ -1031,6 +1062,21 @@ Tok->setType(TT_OverloadedOperatorLParen); } + if (Style.Language == FormatStyle::LK_Verilog) { + const FormatToken *Prev = Tok->getPreviousNonComment(), *Prev2; + // Identify the parameter list and port list in a module + // instantiation. + if (Prev && (Prev2 = Prev->getPreviousNonComment()) && + ((Prev->is(tok::hash) && Keywords.isVerilogIdentifier(*Prev2)) || + (Keywords.isVerilogIdentifier(*Prev) && + (Prev2->is(tok::r_paren) || + Keywords.isVerilogIdentifier(*Prev2) || + (Prev2->endsSequence(tok::comma, tok::r_paren) && + (Prev2 = Prev2->getPreviousNonComment()->MatchingParen) && + Prev2->is(TT_VerilogInstancePortLParen)))))) + Tok->setType(TT_VerilogInstancePortLParen); + } + if (!parseParens()) return false; if (Line.MustBeDeclaration && Contexts.size() == 1 && @@ -1146,24 +1192,44 @@ parseTemplateDeclaration(); break; case tok::comma: - if (Contexts.back().InCtorInitializer) + switch (Contexts.back().ContextType) { + case Context::CtorInitializer: Tok->setType(TT_CtorInitializerComma); - else if (Contexts.back().InInheritanceList) + break; + case Context::InheritanceList: Tok->setType(TT_InheritanceComma); - else if (Contexts.back().FirstStartOfName && - (Contexts.size() == 1 || startsWithInitStatement(Line))) { - Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; - Line.IsMultiVariableDeclStmt = true; + break; + case Context::VerilogInstancePortList: + Tok->setType(TT_VerilogInstancePortComma); + break; + default: + if (Style.Language == FormatStyle::LK_Verilog && Contexts.size() == 1 && + Line.startsWith(Keywords.kw_assign)) + Tok->setType(TT_VerilogAssignComma); + else if (Contexts.back().FirstStartOfName && + (Contexts.size() == 1 || startsWithInitStatement(Line))) { + Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; + Line.IsMultiVariableDeclStmt = true; + } + break; } if (Contexts.back().IsForEachMacro) Contexts.back().IsExpression = true; break; + case tok::kw_default: + if (Style.Language == FormatStyle::LK_Verilog && + Keywords.isEndOfLabel(*Tok, Style) && + (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))) + --Line.Level; + break; case tok::identifier: if (Tok->isOneOf(Keywords.kw___has_include, Keywords.kw___has_include_next)) parseHasInclude(); if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next && Tok->Next->isNot(tok::l_paren)) { + if (!Tok->getPreviousNonComment()) + Line.IsContinuation = true; Tok->setType(TT_CSharpGenericTypeConstraint); parseCSharpGenericTypeConstraint(); } @@ -1282,30 +1348,40 @@ // sequence. if (!CurrentToken->Tok.getIdentifierInfo()) return Type; - switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { - case tok::pp_include: - case tok::pp_include_next: - case tok::pp_import: - next(); - parseIncludeDirective(); - Type = LT_ImportStatement; - break; - case tok::pp_error: - case tok::pp_warning: - parseWarningOrError(); - break; - case tok::pp_pragma: - parsePragma(); - break; - case tok::pp_if: - case tok::pp_elif: - Contexts.back().IsExpression = true; - next(); - parseLine(); - break; - default: - break; + + // In Verilog macro expansions start with a backtick just like + // preprocessor directives. Thus we stop if the word is not a + // preprocessor directive. + if (Style.Language == FormatStyle::LK_Verilog) { + if (!Keywords.isVerilogPPDirective(*CurrentToken)) + return LT_Invalid; + } else { + switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { + case tok::pp_include: + case tok::pp_include_next: + case tok::pp_import: + next(); + parseIncludeDirective(); + Type = LT_ImportStatement; + break; + case tok::pp_error: + case tok::pp_warning: + parseWarningOrError(); + break; + case tok::pp_pragma: + parsePragma(); + break; + case tok::pp_if: + case tok::pp_elif: + Contexts.back().IsExpression = true; + next(); + parseLine(); + break; + default: + break; + } } + while (CurrentToken) { FormatToken *Tok = CurrentToken; next(); @@ -1323,17 +1399,23 @@ if (!CurrentToken) return LT_Invalid; NonTemplateLess.clear(); - if (CurrentToken->is(tok::hash)) - return parsePreprocessorDirective(); + if (Keywords.isPPHash(*CurrentToken, Style)) { + // We were not allowed to use C++17 optional yet when this was + // being written. So we used LT_Invalid to mark that the line is + // not a preprocessor directive. + auto Type = parsePreprocessorDirective(); + if (Type != LT_Invalid) + return Type; + } // Directly allow to 'import ' to support protocol buffer // definitions (github.com/google/protobuf) or missing "#" (either way we // should not break the line). - IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); if ((Style.Language == FormatStyle::LK_Java && CurrentToken->is(Keywords.kw_package)) || - (Info && Info->getPPKeywordID() == tok::pp_import && - CurrentToken->Next && + ((Style.isCpp() || Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_Java) && + CurrentToken->is(Keywords.kw_import) && CurrentToken->Next && CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier, tok::kw_static))) { next(); @@ -1428,9 +1510,9 @@ // recovered from an error (e.g. failure to find the matching >). if (!CurrentToken->isTypeFinalized() && !CurrentToken->isOneOf( - TT_AttributeMacro, TT_BracedListLBrace, TT_ClassLBrace, - TT_CompoundRequirementLBrace, TT_ConditionLParen, TT_EnumLBrace, - TT_FatArrow, TT_ForEachMacro, TT_FunctionLBrace, + TT_AttributeMacro, TT_BitFieldColon, TT_BracedListLBrace, + TT_ClassLBrace, TT_CompoundRequirementLBrace, TT_ConditionLParen, + TT_EnumLBrace, TT_FatArrow, TT_ForEachMacro, TT_FunctionLBrace, TT_FunctionLikeOrFreestandingMacro, TT_IfMacro, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_LambdaArrow, TT_LambdaLBrace, TT_LambdaLSquare, TT_NamespaceMacro, @@ -1439,7 +1521,9 @@ TT_RequiresClauseInARequiresExpression, TT_RequiresExpression, TT_RequiresExpressionLBrace, TT_RequiresExpressionLParen, TT_StatementAttributeLikeMacro, TT_StructLBrace, TT_TemplateString, - TT_TypenameMacro, TT_UnionLBrace, TT_UntouchableMacroFunc)) + TT_TypenameMacro, TT_UnionLBrace, TT_UntouchableMacroFunc, + TT_VerilogDimensionedTypeName, TT_VerilogNumberBase, + TT_VerilogTableItem)) CurrentToken->setType(TT_Unknown); CurrentToken->Role.reset(); CurrentToken->MatchingParen = nullptr; @@ -1479,13 +1563,18 @@ FormatToken *FirstStartOfName = nullptr; bool CanBeExpression = true; bool InTemplateArgument = false; - bool InCtorInitializer = false; - bool InInheritanceList = false; bool CaretFound = false; bool IsForEachMacro = false; bool InCpp11AttributeSpecifier = false; bool InCSharpAttributeSpecifier = false; bool InStructArrayInitializer = false; + bool AssignmentFound = false; + enum { + Unknown, + CtorInitializer, + InheritanceList, + VerilogInstancePortList, + } ContextType = Unknown; }; /// Puts a new \c Context onto the stack \c Contexts for the lifetime @@ -1591,15 +1680,16 @@ } else if (Current.Previous && Current.Previous->is(TT_CtorInitializerColon)) { Contexts.back().IsExpression = true; - Contexts.back().InCtorInitializer = true; + Contexts.back().ContextType = Context::CtorInitializer; } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) { - Contexts.back().InInheritanceList = true; + Contexts.back().ContextType = Context::InheritanceList; } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { for (FormatToken *Previous = Current.Previous; Previous && Previous->isOneOf(tok::star, tok::amp); Previous = Previous->Previous) Previous->setType(TT_PointerOrReference); - if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer) + if (Line.MustBeDeclaration && + Contexts.front().ContextType != Context::CtorInitializer) Contexts.back().IsExpression = false; } else if (Current.is(tok::kw_new)) { Contexts.back().CanBeExpression = false; @@ -1708,6 +1798,25 @@ Current.setType(TT_NonNullAssertion); return; } + } else if (Style.Language == FormatStyle::LK_Verilog) { + if (Keywords.isVerilogWordOperator(Current)) { + Current.setFinalizedType(TT_BinaryOperator); + Current.ForcedPrecedence = prec::Comma; + return; + } + if (Current.is(tok::l_paren) && Current.getPreviousNonComment() && + Current.getPreviousNonComment()->isOneOf(Keywords.kw_iff, + Keywords.kw_with)) { + Current.setType(TT_ConditionLParen); + return; + } + // In a block event expression in a coverage event, these words + // are not like braces. + if (Current.isOneOf(Keywords.kw_begin, Keywords.kw_end) && + Contexts.back().ContextKind == tok::l_paren) { + Current.setType(TT_VerilogBlockEvent); + return; + } } // Line.MightBeFunctionDecl can only be true after the parentheses of a @@ -1727,7 +1836,8 @@ } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) { AutoFound = true; } else if (Current.is(tok::arrow) && - Style.Language == FormatStyle::LK_Java) { + (Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_Verilog)) { Current.setType(TT_LambdaArrow); } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration && Current.NestingLevel == 0 && @@ -1747,13 +1857,22 @@ Current, Contexts.back().CanBeExpression && Contexts.back().IsExpression, Contexts.back().InTemplateArgument)); - } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { + } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret) || + (Style.Language == FormatStyle::LK_Verilog && + Current.is(tok::pipe))) { Current.setType(determinePlusMinusCaretUsage(Current)); if (Current.is(TT_UnaryOperator) && Current.is(tok::caret)) Contexts.back().CaretFound = true; } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { Current.setType(determineIncrementUsage(Current)); - } else if (Current.isOneOf(tok::exclaim, tok::tilde)) { + } else if (Current.isOneOf(tok::exclaim, tok::tilde) && + !((Current.TokenText == "not" || Current.TokenText == "compl") && + (!Current.getNextNonComment() || + Current.getNextNonComment()->isOneOf( + tok::r_paren, tok::semi, tok::r_square, tok::r_brace)))) { + // If we get a unary operator written as a word where it shouldn't + // be then probably we are using plain C and it is not an + // operator. Current.setType(TT_UnaryOperator); } else if (Current.is(tok::question)) { if (Style.isJavaScript() && Line.MustBeDeclaration && @@ -1768,7 +1887,17 @@ (!Current.Previous || Current.Previous->isNot(tok::l_square)) && (!Current.is(tok::greater) && Style.Language != FormatStyle::LK_TextProto)) { - Current.setType(TT_BinaryOperator); + if (Style.Language == FormatStyle::LK_Verilog && + Current.is(tok::lessequal) && Contexts.size() == 1 && + !Contexts.back().AssignmentFound) { + // In Verilog `<=` can be assignment if in its own statement. + Current.ForcedPrecedence = prec::Assignment; + Current.setFinalizedType(TT_BinaryOperator); + } else { + Current.setType(TT_BinaryOperator); + } + if (Current.getPrecedence() == prec::Assignment) + Contexts.back().AssignmentFound = true; } else if (Current.is(tok::comment)) { if (Current.TokenText.startswith("/*")) if (Current.TokenText.endswith("*/")) @@ -1796,6 +1925,7 @@ if (FormatToken *BeforeParen = Current.MatchingParen->Previous) if (BeforeParen->is(tok::identifier) && !BeforeParen->is(TT_TypenameMacro) && + Style.Language != FormatStyle::LK_Verilog && BeforeParen->TokenText == BeforeParen->TokenText.upper() && (!BeforeParen->Previous || BeforeParen->Previous->ClosesTemplateDeclaration)) @@ -1827,7 +1957,7 @@ Current.Previous->isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) Current.setType(Current.Previous->getType()); - } else if (canBeObjCSelectorComponent(Current) && + } else if (Style.isCpp() && canBeObjCSelectorComponent(Current) && // FIXME(bug 36976): ObjC return types shouldn't use // TT_CastRParen. Current.Previous && Current.Previous->is(TT_CastRParen) && @@ -1839,7 +1969,8 @@ // colon after this, this is the only place which annotates the identifier // as a selector.) Current.setType(TT_SelectorName); - } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept, + } else if (Style.Language != FormatStyle::LK_Verilog && + Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept, tok::kw_requires) && Current.Previous && !Current.Previous->isOneOf(tok::equal, tok::at) && @@ -1866,13 +1997,49 @@ } } + /// Test whether \p Tok is a qualifier like "const". + const FormatToken *qualifierToSkipBack(const FormatToken *Tok) { + // FIXME: Before I added this function, every language except + // Javascript skipped only "const". I changed the C++ case into + // skipping all qualifiers. I left other languages still skipping + // "const" because I didn't know the correct behavior. It probably + // isn't the desired behavior. + + switch (Style.Language) { + case FormatStyle::LK_Cpp: + if (Tok->canBePointerOrReferenceQualifier()) + return Tok; + break; + case FormatStyle::LK_JavaScript: + // For javascript const can be like "let" or "var" + break; + case FormatStyle::LK_Verilog: + if (Keywords.isVerilogQualifier(*Tok)) + return Tok; + else if (Tok->is(tok::r_square)) + // if MatchingParen is nullptr then nullptr is returned as + // intended + return Tok->MatchingParen; + break; + default: + if (Tok->is(tok::kw_const)) + return Tok; + break; + } + return nullptr; + } + /// Take a guess at whether \p Tok starts a name of a function or /// variable declaration. /// /// This is a heuristic based on whether \p Tok is an identifier following /// something that is likely a type. bool isStartOfName(const FormatToken &Tok) { - if (Tok.isNot(tok::identifier) || !Tok.Previous) + // For Verilog this is handled in ExpressionParser. + if (Style.Language == FormatStyle::LK_Verilog) + return false; + + if (!Keywords.isIdentifier(Tok, Style) || !Tok.Previous) return false; if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof, @@ -1881,8 +2048,11 @@ if (Style.isJavaScript() && Tok.Previous->is(Keywords.kw_in)) return false; - // Skip "const" as it does not have an influence on whether this is a name. - FormatToken *PreviousNotConst = Tok.getPreviousNonComment(); + // Skip stuff like "const" as they do not have an influence on + // whether this is a name. + const FormatToken *PreviousNotConst = Tok.getPreviousNonComment(); + if (!PreviousNotConst) + return false; // For javascript const can be like "let" or "var" if (!Style.isJavaScript()) @@ -1895,9 +2065,9 @@ if (PreviousNotConst->ClosesRequiresClause) return false; - bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && + bool IsPPKeyword = Keywords.isIdentifier(*PreviousNotConst, Style) && PreviousNotConst->Previous && - PreviousNotConst->Previous->is(tok::hash); + Keywords.isPPHash(*PreviousNotConst->Previous, Style); if (PreviousNotConst->is(TT_TemplateCloser)) return PreviousNotConst && PreviousNotConst->MatchingParen && @@ -1909,7 +2079,7 @@ PreviousNotConst->is(TT_TypeDeclarationParen)) return true; - // If is a preprocess keyword like #define. + // If the previous token is a preprocessor keyword like #define. if (IsPPKeyword) return false; @@ -2129,6 +2299,12 @@ if (Style.isCSharp() && Tok.is(tok::ampamp)) return TT_BinaryOperator; + if (Style.Language == FormatStyle::LK_Verilog) { + if (Tok.is(tok::star)) + return TT_BinaryOperator; + return determinePlusMinusCaretUsage(Tok); + } + const FormatToken *PrevToken = Tok.getPreviousNonComment(); if (!PrevToken) return TT_UnaryOperator; @@ -2275,23 +2451,37 @@ FormatToken *LatestOperator = nullptr; unsigned OperatorIndex = 0; + // Used for grouping Verilog declaratioins by type. + FormatToken *TypeFrom = nullptr; + while (Current) { + // In Verilog ports in a module header that don't have a type take the + // type of the previous one. For example, + // module a(output b, + // c, + // output d); + // In this case there need to be fake parentheses around b and c. + if (Style.Language == FormatStyle::LK_Verilog && + Precedence == prec::Comma) + TypeFrom = verilogGroupDecl(TypeFrom, LatestOperator); + // Consume operators with higher precedence. parse(Precedence + 1); + if (!Current) + break; + int CurrentPrecedence = getCurrentPrecedence(); - if (Precedence == CurrentPrecedence && Current && - Current->is(TT_SelectorName)) { + if (Precedence == CurrentPrecedence && Current->is(TT_SelectorName)) { if (LatestOperator) addFakeParenthesis(Start, prec::Level(Precedence)); Start = Current; } - // At the end of the line or when an operator with higher precedence is + // At the end of the line or when an operator with lower precedence is // found, insert fake parenthesis and return. - if (!Current || - (Current->closesScope() && + if ((Current->closesScope() && (Current->MatchingParen || Current->is(TT_TemplateString))) || (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) || (CurrentPrecedence == prec::Conditional && @@ -2311,19 +2501,25 @@ parse(); } next(); - } else { - // Operator found. - if (CurrentPrecedence == Precedence) { - if (LatestOperator) - LatestOperator->NextOperator = Current; - LatestOperator = Current; - Current->OperatorIndex = OperatorIndex; - ++OperatorIndex; - } - next(/*SkipPastLeadingComments=*/Precedence > 0); + continue; } + + // Operator found. + if (CurrentPrecedence == Precedence) { + if (LatestOperator) + LatestOperator->NextOperator = Current; + LatestOperator = Current; + Current->OperatorIndex = OperatorIndex; + ++OperatorIndex; + } + next(/*SkipPastLeadingComments=*/Precedence > 0); } + // Group variables of the same type. + if (Style.Language == FormatStyle::LK_Verilog && + Precedence == prec::Comma && TypeFrom) + addFakeParenthesis(TypeFrom, prec::Comma); + if (LatestOperator && (Current || Precedence > 0)) { // The requires clauses do not neccessarily end in a semicolon or a brace, // but just go over to struct/class or a function declaration, we need to @@ -2353,43 +2549,49 @@ /// Gets the precedence (+1) of the given token for binary operators /// and other tokens that we treat like binary operators. int getCurrentPrecedence() { - if (Current) { - const FormatToken *NextNonComment = Current->getNextNonComment(); - if (Current->is(TT_ConditionalExpr)) - return prec::Conditional; - if (NextNonComment && Current->is(TT_SelectorName) && - (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) || - ((Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) && - NextNonComment->is(tok::less)))) - return prec::Assignment; - if (Current->is(TT_JsComputedPropertyName)) - return prec::Assignment; - if (Current->is(TT_LambdaArrow)) - return prec::Comma; - if (Current->is(TT_FatArrow)) - return prec::Assignment; - if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) || - (Current->is(tok::comment) && NextNonComment && - NextNonComment->is(TT_SelectorName))) - return 0; - if (Current->is(TT_RangeBasedForLoopColon)) - return prec::Comma; - if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && - Current->is(Keywords.kw_instanceof)) - return prec::Relational; - if (Style.isJavaScript() && - Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) - return prec::Relational; - if (Current->is(TT_BinaryOperator) || Current->is(tok::comma)) - return Current->getPrecedence(); - if (Current->isOneOf(tok::period, tok::arrow)) - return PrecedenceArrowAndPeriod; - if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && - Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements, - Keywords.kw_throws)) - return 0; - } + if (!Current) + return -1; + + const FormatToken *NextNonComment = Current->getNextNonComment(); + if (Current->is(TT_ConditionalExpr)) + return prec::Conditional; + if (NextNonComment && Current->is(TT_SelectorName) && + (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) || + ((Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) && + NextNonComment->is(tok::less)))) + return prec::Assignment; + if (Current->is(TT_JsComputedPropertyName)) + return prec::Assignment; + if (Current->is(TT_LambdaArrow)) + return prec::Comma; + if (Current->is(TT_FatArrow)) + return prec::Assignment; + if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) || + (Current->is(tok::comment) && NextNonComment && + NextNonComment->is(TT_SelectorName))) + return 0; + if (Current->is(TT_RangeBasedForLoopColon)) + return prec::Comma; + if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && + Current->is(Keywords.kw_instanceof)) + return prec::Relational; + if (Style.isJavaScript() && + Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) + return prec::Relational; + if (Current->is(TT_BinaryOperator) || Current->is(tok::comma)) + return Current->getPrecedence(); + if (Current->isOneOf(tok::period, tok::arrow)) + return PrecedenceArrowAndPeriod; + if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && + Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements, + Keywords.kw_throws)) + return 0; + // In Verilog case labels are not on separate lines straight out of + // UnwrappedLineParser. Handle them here. + if (Style.Language == FormatStyle::LK_Verilog && Current->is(tok::colon)) + return 0; + return -1; } @@ -2446,6 +2648,127 @@ Current = Current->Next; } + FormatToken *verilogGroupDecl(FormatToken *TypeFrom, + FormatToken *PreviousComma) { + if (!Current) + return nullptr; + + FormatToken *Start = Current; + + // Skip attributes. + while (Start->startsSequence(tok::l_paren, tok::star)) + if (!((Start = Start->MatchingParen) && + (Start = Start->getNextNonComment()))) + return nullptr; + + FormatToken *Tok = Start; + + if (Tok->is(Keywords.kw_assign)) + Tok = Tok->getNextNonComment(); + + // Skip any type qualifiers to find the first identifier. Which may be + // either a type or the newly declared variable. Identifiers are also + // skipped in case there is some new type qualifier we mistake as + // identifier. + FormatToken *First = nullptr; + while (Tok) { + FormatToken *Next = Tok->getNextNonComment(); + + if (Keywords.isPPHash(*Tok, Style)) { + // Start of a macro expansion. Skip the macro name. + First = Tok; + if ((Tok = Next)) + Tok = Tok->getNextNonComment(); + } else if (Keywords.isPPHashHash(*Tok, Style)) { + // Concatenation. Skip. + if ((Tok = Next)) + Tok = Tok->getNextNonComment(); + } else if ((Keywords.isVerilogQualifier(*Tok) || + Keywords.isVerilogIdentifier(*Tok))) { + First = Tok; + Tok = Next; + // The name may have dots like `interface_foo.modport_foo`. + while (Tok && Tok->isOneOf(tok::period, tok::coloncolon) && + (Tok = Tok->getNextNonComment())) { + if (Keywords.isVerilogIdentifier(*Tok)) + Tok = Tok->getNextNonComment(); + } + } else if (!Next) { + Tok = nullptr; + } else if (Tok->is(tok::l_paren)) { + // Make sure the parenthesized list is a drive strength. Otherwise it + // may be a module instantiation in which case we have already found the + // instance name. + if (Next->isOneOf( + Keywords.kw_highz0, Keywords.kw_highz1, Keywords.kw_large, + Keywords.kw_medium, Keywords.kw_pull0, Keywords.kw_pull1, + Keywords.kw_small, Keywords.kw_strong0, Keywords.kw_strong1, + Keywords.kw_supply0, Keywords.kw_supply1, Keywords.kw_weak0, + Keywords.kw_weak1)) { + Tok->setType(TT_VerilogStrength); + Tok = Tok->MatchingParen; + if (Tok) { + Tok->setType(TT_VerilogStrength); + Tok = Tok->getNextNonComment(); + } + } else + break; + } else if (Tok->is(tok::hash)) { + if (Next->is(tok::l_paren)) + Next = Next->MatchingParen; + if (Next) + Tok = Next->getNextNonComment(); + } else + break; + } + + // Find the second identifier. If it exists it will be the name. + FormatToken *Second = nullptr; + while (Tok && Tok->is(tok::l_square) && (Tok = Tok->MatchingParen)) + Tok = Tok->getNextNonComment(); + if (Tok && + (Keywords.isPPHash(*Tok, Style) || Keywords.isVerilogIdentifier(*Tok))) + Second = Tok; + + // If the second identifier doesn't exist and there are qualifiers, + // the type is implied. + FormatToken *TypedName = nullptr; + if (Second) { + TypedName = Second; + if (First && First->is(TT_Unknown)) + First->setType(TT_VerilogDimensionedTypeName); + } else if (First != Start) + TypedName = First; + + if (TypedName) { + // This is a declaration with a new type. + if (TypedName->is(TT_Unknown)) + TypedName->setType(TT_StartOfName); + // Group variables of the previous type. + if (TypeFrom && PreviousComma) { + PreviousComma->setType(TT_VerilogTypeComma); + addFakeParenthesis(TypeFrom, prec::Comma, PreviousComma->Previous); + } + + TypeFrom = TypedName; + + // Don't let higher precedence handle the qualifiers. For example if we + // have: + // parameter x = 0 + // We skip `parameter` here. This way the fake parentheses for the + // assignment will be around `x = 0`. + while (Current && Current != TypeFrom) { + if (Current->opensScope()) { + next(); + parse(); + } + next(); + } + } + + return TypeFrom; + } + const FormatStyle &Style; const AdditionalKeywords &Keywords; const AnnotatedLine &Line; @@ -3055,14 +3378,22 @@ if (Left.Finalized) return Right.hasWhitespaceBefore(); - if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo()) - return true; // Never ever merge two identifiers. + // Never ever merge two words. getIdentifierInfo() returns non-null for + // keywords as well as identifiers. + if (Right.Tok.getIdentifierInfo() && !Keywords.isAdditionalSymbol(Right) && + Left.Tok.getIdentifierInfo() && !Keywords.isAdditionalSymbol(Left)) + return true; // Leave a space between * and /* to avoid C4138 `comment end` found outside // of comment. if (Left.is(tok::star) && Right.is(tok::comment)) return true; + // In Verilog, an escaped identifier ends with whitespace. + if (Style.Language == FormatStyle::LK_Verilog && Left.is(tok::identifier) && + Left.TokenText[0] == '\\') + return true; + if (Style.isCpp()) { // Space between import . // or import .....; @@ -3296,7 +3627,71 @@ Keywords.kw_native)) && Right.is(TT_TemplateOpener)) return true; + } else if (Style.Language == FormatStyle::LK_Verilog) { + // Add space between things in a primitive's state table unless in a + // transition like `(0?)`. + if ((Left.is(TT_VerilogTableItem) && + !Right.isOneOf(tok::r_paren, tok::semi)) || + (Right.is(TT_VerilogTableItem) && !Left.is(tok::l_paren))) { + const FormatToken *Next = Right.getNextNonComment(); + return !(Next && Next->is(tok::r_paren)); + } + // Don't add space within a delay like `#0`. The followed-by operators `#-#` + // and `#=#` match tok::hash as they take the kind of the first token when + // being merged. + if (!Left.is(TT_BinaryOperator) && + (Left.is(tok::hash) || Left.is(tok::hashhash))) + return false; + // Add space after a delay. + if (!Right.is(tok::semi) && + (Left.endsSequence(tok::numeric_constant, tok::hash) || + Left.endsSequence(tok::numeric_constant, tok::hashhash) || + (Left.is(tok::r_paren) && Left.MatchingParen && + Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) + return true; + // Don't add embedded spaces in a number literal like `16'h1?ax` or an array + // literal like `'{}`. + if (Left.is(Keywords.quote) || + (Left.is(TT_VerilogNumberBase) && Right.is(tok::numeric_constant))) + return false; + // Don't add spaces between two at signs. Like in a coverage event. + // Don't add spaces between at and a sensitivity list like + // `@(posedge clk)`. + if (Left.is(tok::at) && Right.isOneOf(tok::l_paren, tok::star, tok::at)) + return false; + // Add space between the type name and dimension like `logic [1:0]`. + if (Right.is(tok::l_square) && + (Left.isOneOf(TT_VerilogDimensionedTypeName, Keywords.kw_function) || + Keywords.isVerilogQualifier(Left))) + return true; + // Don't add spaces between a casting type and the quote or repetition count + // and the brace. + if ((Right.is(Keywords.quote) || + (Right.is(BK_BracedInit) && Right.is(tok::l_brace))) && + !(Left.isOneOf(Keywords.kw_assign, Keywords.kw_unique) || + Keywords.isVerilogWordOperator(Left)) && + (Left.isOneOf(tok::r_square, tok::r_paren, tok::r_brace, + tok::numeric_constant, tok::identifier) || + Keywords.isCXXKeyword(Left))) + return false; + // Don't add spaces in imports like `import foo::*;`. + if ((Right.is(tok::star) && Left.is(tok::coloncolon)) || + (Left.is(tok::star) && Right.is(tok::semi))) + return false; + // Add space in attribute like `(* ASYNC_REG = "TRUE" *)`. + if (Left.endsSequence(tok::star, tok::l_paren) && Right.is(tok::identifier)) + return true; + // Add space before drive strength like in `wire (strong1, pull0)`. + if (Right.is(tok::l_paren) && Right.is(TT_VerilogStrength)) + return true; + // Don't add space in a streaming concatenation like `{>>{j}}`. + if ((Left.is(tok::l_brace) && + Right.isOneOf(tok::lessless, tok::greatergreater)) || + (Left.endsSequence(tok::lessless, tok::l_brace) || + Left.endsSequence(tok::greatergreater, tok::l_brace))) + return false; } + if (Left.is(TT_ImplicitStringLiteral)) return Right.hasWhitespaceBefore(); if (Line.Type == LT_ObjCMethodDecl) { @@ -3334,6 +3729,8 @@ if (Right.is(tok::colon)) { if (Line.First->isOneOf(tok::kw_default, tok::kw_case)) return Style.SpaceBeforeCaseColon; + if (Right.is(TT_GotoLabelColon)) + return false; const FormatToken *Next = Right.getNextNonComment(); if (!Next || Next->is(tok::semi)) return false; @@ -3443,15 +3840,16 @@ Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen)) return false; if (Right.is(tok::less) && Left.isNot(tok::l_paren) && - Line.startsWith(tok::hash)) + (Line.Type == LT_PreprocessorDirective || + Line.Type == LT_ImportStatement)) return true; if (Right.is(TT_TrailingUnaryOperator)) return false; if (Left.is(TT_RegexLiteral)) return false; - if (Left.is(tok::kw_return) && - !Right.isOneOf(tok::semi, tok::r_paren, tok::hashhash)) + if (Left.is(tok::kw_return) && !(Right.isOneOf(tok::semi, tok::r_paren) || + Keywords.isPPHashHash(Right, Style))) return true; if (Style.isJson() && Left.is(tok::string_literal) && Right.is(tok::colon)) return false; @@ -3460,10 +3858,10 @@ if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && Left.Tok.getObjCKeywordID() == tok::objc_property) return true; - if (Right.is(tok::hashhash)) - return Left.is(tok::hash); - if (Left.isOneOf(tok::hashhash, tok::hash)) - return Right.is(tok::hash); + if (Keywords.isPPHashHash(Right, Style)) + return Keywords.isPPHash(Left, Style); + if (Keywords.isPPHash(Left, Style) || Keywords.isPPHashHash(Left, Style)) + return Keywords.isPPHash(Right, Style); if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) || (Left.is(tok::l_brace) && Left.isNot(BK_Block) && Right.is(tok::r_brace) && Right.isNot(BK_Block))) @@ -3492,7 +3890,7 @@ (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen))) ? Style.SpacesInCStyleCastParentheses : Style.SpacesInParentheses; - if (Right.isOneOf(tok::semi, tok::comma)) + if (Right.is(tok::semi)) return false; if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) { bool IsLightweightGeneric = Right.MatchingParen && @@ -3648,7 +4046,7 @@ // dependent on PointerAlignment style. if (Previous) { if (Previous->endsSequence(tok::kw_operator)) - return Style.PointerAlignment != FormatStyle::PAS_Left; + return (Style.PointerAlignment != FormatStyle::PAS_Left); if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile)) return (Style.PointerAlignment != FormatStyle::PAS_Left) || (Style.SpaceAroundPointerQualifiers == @@ -3775,9 +4173,8 @@ if (Right.is(TT_UnaryOperator)) return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr)); - if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square, - tok::r_paren) || - Left.isSimpleTypeSpecifier()) && + if ((Left.isOneOf(tok::greater, tok::r_square, tok::r_paren) || + Keywords.isIdentifier(Left, Style) || Left.isSimpleTypeSpecifier()) && Right.is(tok::l_brace) && Right.getNextNonComment() && Right.isNot(BK_Block)) return false; @@ -3785,7 +4182,7 @@ return false; // u#str, U#str, L#str, u8#str // uR#str, UR#str, LR#str, u8R#str - if (Right.is(tok::hash) && Left.is(tok::identifier) && + if (Keywords.isPPHash(Right, Style) && Left.is(tok::identifier) && (Left.TokenText == "L" || Left.TokenText == "u" || Left.TokenText == "U" || Left.TokenText == "u8" || Left.TokenText == "LR" || Left.TokenText == "uR" || @@ -4230,6 +4627,28 @@ return true; } + if (Style.Language == FormatStyle::LK_Verilog) { + // Break between ports of different types. + if (Left.is(TT_VerilogTypeComma)) + return true; + // Break between assignments. + if (Left.is(TT_VerilogAssignComma)) + return true; + // Break between ports in a module instantiation and after the + // parameter list. + if (Style.BreakBetweenInstancePorts && + (Left.is(TT_VerilogInstancePortComma) || + (Left.is(tok::r_paren) && Keywords.isVerilogIdentifier(Right) && + Left.MatchingParen && + Left.MatchingParen->is(TT_VerilogInstancePortLParen)))) + return true; + // Break after labels. Usually labels are already on their own lines in + // UnwrappedLineParser. However for Verilog some case labels can contain + // complex expressions so they are only recognized in the annotator. + if (!Keywords.isVerilogBegin(Right) && Keywords.isEndOfLabel(Left, Style)) + return true; + } + return false; } @@ -4546,7 +4965,8 @@ } void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { - llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n"; + llvm::errs() << "AnnotatedTokens(L=" << Line.Level << ", T=" << Line.Type + << ", C=" << Line.IsContinuation << "):\n"; const FormatToken *Tok = Line.First; while (Tok) { llvm::errs() << " M=" << Tok->MustBreakBefore @@ -4556,7 +4976,8 @@ << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount << " BK=" << Tok->getBlockKind() << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength - << " PPK=" << Tok->getPackingKind() << " FakeLParens="; + << " PPK=" << Tok->getPackingKind() + << " Pre=" << Tok->ForcedPrecedence << " FakeLParens="; for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) llvm::errs() << Tok->FakeLParens[i] << "/"; llvm::errs() << " FakeRParens=" << Tok->FakeRParens; diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -71,7 +71,7 @@ } if (static_cast(Indent) + Offset >= 0) Indent += Offset; - if (Line.First->is(TT_CSharpGenericTypeConstraint)) + if (Line.IsContinuation) Indent = Line.Level * Style.IndentWidth + Style.ContinuationIndentWidth; } @@ -105,7 +105,6 @@ if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || Style.isCSharp()) return 0; - auto IsAccessModifier = [this, &RootToken]() { if (RootToken.isAccessSpecifier(Style.isCpp())) return true; diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -48,6 +48,10 @@ bool MustBeDeclaration; + /// \c True if this line should be indented by ContinuationIndent in + /// addition to the normal indention level. + bool IsContinuation = false; + /// If this \c UnwrappedLine closes a block in a sequence of lines, /// \c MatchingOpeningBlockLineIndex stores the index of the corresponding /// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be @@ -61,6 +65,8 @@ static const size_t kInvalidIndex = -1; unsigned FirstStartColumn = 0; + + bool isIndented() { return Level > 1 || (!InPPDirective && Level > 0); } }; class UnwrappedLineConsumer { @@ -95,10 +101,14 @@ bool parseLevel(bool HasOpeningBrace, bool CanContainBracedList, IfStmtKind *IfKind = nullptr, TokenType NextLBracesType = TT_Unknown); - IfStmtKind parseBlock(bool MustBeDeclaration = false, unsigned AddLevels = 1u, - bool MunchSemi = true, - bool UnindentWhitesmithsBraces = false, - bool CanContainBracedList = true, + enum { + BLOCK_MUST_BE_DECLARATION = 0x1, + BLOCK_KEEP_TRAILING_SEMI = 0x2, + BLOCK_UNINDENT_WHITESMITHS_BRACES = 0x4, + BLOCK_CAN_CONTAIN_BRACED_LIST = 0x8, + BLOCK_VERILOG_HIER = 0x10 + }; + IfStmtKind parseBlock(unsigned Flags = 0u, unsigned AddLevels = 1u, TokenType NextLBracesType = TT_Unknown); void parseChildBlock(bool CanContainBracedList = true, TokenType NextLBracesType = TT_Unknown); @@ -110,6 +120,7 @@ void parsePPEndIf(); void parsePPUnknown(); void readTokenWithJavaScriptASI(); + bool parseStatementSpecial(IfStmtKind *IfKind); void parseStructuralElement(IfStmtKind *IfKind = nullptr, bool IsTopLevel = false, TokenType NextLBracesType = TT_Unknown, @@ -123,11 +134,12 @@ void parseUnbracedBody(bool CheckEOF = false); void handleAttributes(); bool handleCppAttributes(); - FormatToken *parseIfThenElse(IfStmtKind *IfKind, bool KeepBraces = false); + FormatToken *parseIfThenElse(IfStmtKind *IfKind, bool KeepBraces = false, + bool IsVerilogAssert = false); void parseTryCatch(); void parseIndentedBlock(bool BracesAreOptional = true, bool RBraceOnSeparateLine = true); - void parseForOrWhileLoop(); + void parseForOrWhileLoop(bool HasParens = true); void parseDoWhile(); void parseLabel(bool LeftAlignLabel = false); void parseCaseLabel(); @@ -167,11 +179,20 @@ bool tryToParsePropertyAccessor(); void tryToParseJSFunction(); bool tryToParseSimpleAttribute(); + void parseVerilogHierIdentifier(); + // Returns the number of levels to add to the normal indentation level. + unsigned parseVerilogHierHeader(); + void parseVerilogSensitivityList(); + void parseVerilogTable(); // Used by addUnwrappedLine to denote whether to keep or remove a level // when resetting the line state. enum class LineLevel { Remove, Keep }; + // Called when there should be a line break before the current + // token. For example when the current token is a semicolon and there + // should be a line break after the semicolon, call nextToken and then + // addUnwrappedline. void addUnwrappedLine(LineLevel AdjustLevel = LineLevel::Remove); bool eof() const; // LevelDifference is the difference of levels after and before the current diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -309,6 +309,20 @@ int Position; }; +// A class used to set and restore the Token position when peeking ahead +// in the token source. +class ScopedTokenPosition { + unsigned StoredPosition; + FormatTokenSource *Tokens; + +public: + ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { + assert(Tokens && "Tokens expected to not be null"); + StoredPosition = Tokens->getPosition(); + } + + ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } +}; } // end anonymous namespace UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, @@ -460,6 +474,7 @@ return Previous && Previous->is(tok::comment) && (Previous->IsMultiline || Previous->NewlinesBefore > 0); } + /// \brief Parses a level, that is ???. /// \param HasOpeningBrace If that level is started by an opening brace. /// \param CanContainBracedList If the content can contain (at any level) a @@ -516,15 +531,15 @@ if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) continue; - parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, - /*MunchSemi=*/true, /*UnindentWhitesmithBraces=*/false, - CanContainBracedList, + parseBlock(/*Flags=*/CanContainBracedList * BLOCK_CAN_CONTAIN_BRACED_LIST, + /*AddLevels=*/1u, /*NextLBracesType=*/NextLBracesType); ++StatementCount; assert(StatementCount > 0 && "StatementCount overflow!"); addUnwrappedLine(); break; case tok::r_brace: + case_r_brace: if (HasOpeningBrace) { if (!Style.RemoveBracesLLVM) return false; @@ -539,26 +554,30 @@ addUnwrappedLine(); break; case tok::kw_default: { - unsigned StoredPosition = Tokens->getPosition(); FormatToken *Next; - do { - Next = Tokens->getNextToken(); - assert(Next); - } while (Next->is(tok::comment)); - FormatTok = Tokens->setPosition(StoredPosition); - if (Next->isNot(tok::colon)) { - // default not followed by ':' is not a case label; treat it like - // an identifier. - parseStructuralElement(); + { + ScopedTokenPosition AutoPosition(Tokens); + do { + Next = Tokens->getNextToken(); + } while (Next->is(tok::comment)); + } + // default not followed by ':' is not a case label; treat it like + // an identifier. In Verilog the colon is optional. + if (Style.Language != FormatStyle::LK_Verilog && Next && + Next->isNot(tok::colon)) { + ParseDefault(); break; } // Else, if it is 'default:', fall through to the case handling. LLVM_FALLTHROUGH; } case tok::kw_case: - if (Style.isJavaScript() && Line->MustBeDeclaration) { - // A 'case: string' style field declaration. - parseStructuralElement(); + if (Style.Language == FormatStyle::LK_Verilog || + (Style.isJavaScript() && Line->MustBeDeclaration)) { + // Verilog: Case labels don't have this word. We handle case + // labels including default in TokenAnnotator. + // JavaScript: A 'case: string' style field declaration. + ParseDefault(); break; } if (!SwitchLabelEncountered && @@ -575,7 +594,14 @@ } if (handleCppAttributes()) break; - LLVM_FALLTHROUGH; + ParseDefault(); + break; + case tok::identifier: + if (Style.Language == FormatStyle::LK_Verilog && + Keywords.isVerilogEnd(*FormatTok)) + goto case_r_brace; + ParseDefault(); + break; default: ParseDefault(); break; @@ -596,6 +622,13 @@ // update information about whether an lbrace starts a // braced init list or a different block during the loop. SmallVector LBraceStack; + + if (Style.Language == FormatStyle::LK_Verilog && + Keywords.isVerilogBegin(*Tok)) { + Tok->setBlockKind(BK_Block); + return; + } + assert(Tok->is(tok::l_brace)); do { // Get next non-comment token. @@ -639,7 +672,7 @@ // Skip NextTok over preprocessor lines, otherwise we may not // properly diagnose the block as a braced intializer // if the comma separator appears after the pp directive. - while (NextTok->is(tok::hash)) { + while (Keywords.isPPHash(*NextTok, Style)) { ScopedMacroState MacroState(*Line, Tokens, NextTok); do { NextTok = Tokens->getNextToken(); @@ -756,28 +789,48 @@ } UnwrappedLineParser::IfStmtKind -UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, - bool MunchSemi, bool UnindentWhitesmithsBraces, - bool CanContainBracedList, +UnwrappedLineParser::parseBlock(unsigned Flags, unsigned AddLevels, TokenType NextLBracesType) { - assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && + assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || + (Style.Language == FormatStyle::LK_Verilog && + (Keywords.isVerilogBegin(*FormatTok) || + Keywords.isVerilogHier(*FormatTok)))) && "'{' or macro block token expected"); - FormatToken *Tok = FormatTok; + FormatToken *Start = FormatTok; const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); - FormatTok->setBlockKind(BK_Block); + const size_t PPStartHash = computePPHash(); - // For Whitesmiths mode, jump to the next level prior to skipping over the - // braces. - if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) + // For Whitesmiths mode, jump to the next level prior to skipping over + // the braces. + if (!(Flags & BLOCK_VERILOG_HIER) && AddLevels > 0 && + Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) ++Line->Level; - size_t PPStartHash = computePPHash(); - unsigned InitialLevel = Line->Level; - nextToken(/*LevelDifference=*/AddLevels); - if (MacroBlock && FormatTok->is(tok::l_paren)) - parseParens(); + // In Verilog some blocks start with a header and no separate begin + // token. + if (Flags & BLOCK_VERILOG_HIER) + AddLevels += parseVerilogHierHeader(); + else { + assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || + (Style.Language == FormatStyle::LK_Verilog && + Keywords.isVerilogBegin(*FormatTok))) && + "'{' or macro block token expected"); + + FormatTok->setBlockKind(BK_Block); + nextToken(/*LevelDifference=*/AddLevels); + // ":" name + if (Style.Language == FormatStyle::LK_Verilog && + FormatTok->is(tok::colon)) { + nextToken(); + if (Keywords.isVerilogIdentifier(*FormatTok)) + nextToken(); + } + + if (MacroBlock && FormatTok->is(tok::l_paren)) + parseParens(); + } size_t NbPreprocessorDirectives = CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; @@ -790,43 +843,53 @@ // Whitesmiths is weird here. The brace needs to be indented for the namespace // block, but the block itself may not be indented depending on the style // settings. This allows the format to back up one level in those cases. - if (UnindentWhitesmithsBraces) + if (Flags & BLOCK_UNINDENT_WHITESMITHS_BRACES) --Line->Level; ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, - MustBeDeclaration); + Flags & BLOCK_MUST_BE_DECLARATION); if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) Line->Level += AddLevels; IfStmtKind IfKind = IfStmtKind::NotIf; const bool SimpleBlock = parseLevel( - /*HasOpeningBrace=*/true, CanContainBracedList, &IfKind, NextLBracesType); + /*HasOpeningBrace=*/true, + /*CanContainBracedList=*/Flags & BLOCK_CAN_CONTAIN_BRACED_LIST, &IfKind, + NextLBracesType); if (eof()) return IfKind; - if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) - : !FormatTok->is(tok::r_brace)) { + if (!(MacroBlock ? FormatTok->is(TT_MacroBlockEnd) + : (Style.Language == FormatStyle::LK_Verilog && + Keywords.isVerilogEnd(*FormatTok)) || + FormatTok->is(tok::r_brace))) { Line->Level = InitialLevel; FormatTok->setBlockKind(BK_Block); return IfKind; } - if (SimpleBlock && Tok->is(tok::l_brace)) { + if (SimpleBlock && Start->is(tok::l_brace)) { assert(FormatTok->is(tok::r_brace)); const FormatToken *Previous = Tokens->getPreviousToken(); assert(Previous); if (Previous->isNot(tok::r_brace) || Previous->Optional) { - Tok->MatchingParen = FormatTok; - FormatTok->MatchingParen = Tok; + Start->MatchingParen = FormatTok; + FormatTok->MatchingParen = Start; } } size_t PPEndHash = computePPHash(); - // Munch the closing brace. nextToken(/*LevelDifference=*/-AddLevels); + // ":" name + if (Style.Language == FormatStyle::LK_Verilog && FormatTok->is(tok::colon)) { + nextToken(); + if (Keywords.isVerilogIdentifier(*FormatTok)) + nextToken(); + } + if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); @@ -842,7 +905,7 @@ parseStructuralElement(); } - if (MunchSemi && FormatTok->is(tok::semi)) + if (!(Flags & BLOCK_KEEP_TRAILING_SEMI) && FormatTok->is(tok::semi)) nextToken(); Line->Level = InitialLevel; @@ -930,7 +993,7 @@ } void UnwrappedLineParser::parsePPDirective() { - assert(FormatTok->is(tok::hash) && "'#' expected"); + assert(Keywords.isPPHash(*FormatTok, Style) && "'#' expected"); ScopedMacroState MacroState(*Line, Tokens, FormatTok); nextToken(); @@ -963,7 +1026,11 @@ parsePPEndIf(); break; default: - parsePPUnknown(); + if (Style.Language == FormatStyle::LK_Verilog && + FormatTok->is(Keywords.kw_elsif)) + parsePPElIf(); + else + parsePPUnknown(); break; } } @@ -1086,7 +1153,8 @@ IncludeGuard = IG_Defined; IncludeGuardToken = nullptr; for (auto &Line : Lines) { - if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { + const FormatToken *Tok = Line.Tokens.front().Tok; + if (!(Tok->is(tok::comment) || Keywords.isPPHash(*Tok, Style))) { IncludeGuard = IG_Rejected; break; } @@ -1317,18 +1385,129 @@ return addUnwrappedLine(); } -void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, - bool IsTopLevel, - TokenType NextLBracesType, - bool *HasLabel) { +bool UnwrappedLineParser::parseStatementSpecial(IfStmtKind *IfKind) { if (Style.Language == FormatStyle::LK_TableGen && FormatTok->is(tok::pp_include)) { nextToken(); if (FormatTok->is(tok::string_literal)) nextToken(); addUnwrappedLine(); - return; + return true; + } else if (Style.Language == FormatStyle::LK_Verilog) { + if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, + Keywords.kw_assume, Keywords.kw_cover)) { + parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); + return true; + } + if (FormatTok->is(Keywords.kw_ifnone)) { + nextToken(); + parseIndentedBlock(); + return true; + } + if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { + parseForOrWhileLoop(/*HasParens=*/false); + return true; + } // These words can precede `if` and `case`. + if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, + Keywords.kw_unique0)) { + nextToken(); + return parseStatementSpecial(IfKind); + } + // In Verilog attributes like `(* attribute *)` can precede + // keywords like `if`. Skip them here like comments. + if (FormatTok->is(tok::l_paren)) { + const FormatToken *Next; + { + ScopedTokenPosition AutoPosition(Tokens); + Next = Tokens->getNextToken(); + } + if (Next && Next->is(tok::star)) { + parseParens(); + return parseStatementSpecial(IfKind); + } + } } + + // Macros that are special constructs and keywords in other languages. + if (FormatTok->is(tok::identifier)) { + if (FormatTok->is(TT_ForEachMacro) || + (Style.Language == FormatStyle::LK_Verilog && + FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat))) { + parseForOrWhileLoop(); + return true; + } + if (FormatTok->is(TT_MacroBlockBegin)) { + parseBlock(BLOCK_KEEP_TRAILING_SEMI); + return true; + } + if (FormatTok->is(Keywords.kw_import)) { + if (Style.isJavaScript()) { + parseJavaScriptEs6ImportExport(); + return true; + } + if (Style.Language == FormatStyle::LK_Proto) { + nextToken(); + if (FormatTok->is(tok::kw_public)) + nextToken(); + if (!FormatTok->is(tok::string_literal)) + return true; + nextToken(); + if (FormatTok->is(tok::semi)) + nextToken(); + addUnwrappedLine(); + return true; + } + if (Style.isCpp()) { + parseModuleImport(); + return true; + } + } + if (Style.isCpp() && + FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_Q_SIGNALS, + Keywords.kw_slots, Keywords.kw_Q_SLOTS)) { + nextToken(); + if (FormatTok->is(tok::colon)) { + nextToken(); + addUnwrappedLine(); + return true; + } + } + if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { + parseStatementMacro(); + return true; + } + if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { + parseNamespace(); + return true; + } + return false; + } + + // Not all all languages have all these constructs. + bool IsKeyword; + switch (Style.Language) { + // FIXME: add cases for other languages. + case FormatStyle::LK_CSharp: + IsKeyword = Keywords.isCSharpKeyword(*FormatTok); + break; + case FormatStyle::LK_JavaScript: + IsKeyword = !Keywords.IsJavaScriptIdentifier(*FormatTok); + break; + case FormatStyle::LK_Verilog: + IsKeyword = !Keywords.isVerilogIdentifier(*FormatTok); + break; + case FormatStyle::LK_Json: + case FormatStyle::LK_Proto: + IsKeyword = false; + break; + case FormatStyle::LK_Cpp: + default: + IsKeyword = true; + break; + } + if (!IsKeyword) + return false; + switch (FormatTok->Tok.getKind()) { case tok::kw_asm: nextToken(); @@ -1340,16 +1519,16 @@ FormatTok->setFinalizedType(TT_InlineASMBrace); nextToken(); addUnwrappedLine(); - break; + return false; } FormatTok->Finalized = true; nextToken(); } } - break; + return false; case tok::kw_namespace: parseNamespace(); - return; + return true; case tok::kw_public: case tok::kw_protected: case tok::kw_private: @@ -1358,58 +1537,67 @@ nextToken(); else parseAccessSpecifier(); - return; + return true; case tok::kw_if: if (Style.isJavaScript() && Line->MustBeDeclaration) // field/method declaration. - break; + return false; parseIfThenElse(IfKind); - return; + return true; case tok::kw_for: case tok::kw_while: if (Style.isJavaScript() && Line->MustBeDeclaration) // field/method declaration. - break; + return false; parseForOrWhileLoop(); - return; + return true; case tok::kw_do: if (Style.isJavaScript() && Line->MustBeDeclaration) // field/method declaration. - break; + return false; parseDoWhile(); - return; + return true; case tok::kw_switch: if (Style.isJavaScript() && Line->MustBeDeclaration) // 'switch: string' field declaration. - break; + return false; parseSwitch(); - return; + return true; case tok::kw_default: + // In Verilog default along with other labels are not handled here. + if (Style.Language == FormatStyle::LK_Verilog) + return false; if (Style.isJavaScript() && Line->MustBeDeclaration) // 'default: string' field declaration. - break; + return false; nextToken(); if (FormatTok->is(tok::colon)) { parseLabel(); - return; + return true; } // e.g. "default void f() {}" in a Java interface. - break; + return false; case tok::kw_case: + // In Verilog switch is called case. + if (Style.Language == FormatStyle::LK_Verilog) { + parseBlock(BLOCK_VERILOG_HIER); + addUnwrappedLine(); + return true; + } if (Style.isJavaScript() && Line->MustBeDeclaration) { // 'case: string' field declaration. nextToken(); - break; + return false; } parseCaseLabel(); - return; + return true; case tok::kw_try: case tok::kw___try: if (Style.isJavaScript() && Line->MustBeDeclaration) // field/method declaration. - break; + return false; parseTryCatch(); - return; + return true; case tok::kw_extern: nextToken(); if (FormatTok->is(tok::string_literal)) { @@ -1426,83 +1614,46 @@ FormatStyle::IEBS_AfterExternBlock) ? 1u : 0u; - parseBlock(/*MustBeDeclaration=*/true, AddLevels); + parseBlock(BLOCK_MUST_BE_DECLARATION, AddLevels); addUnwrappedLine(); - return; + return true; } + } else if (Style.Language == FormatStyle::LK_Verilog && + Keywords.isVerilogHier(*FormatTok)) { + // In Verilog and extern module declaration looks like a start of module. + // But there is no body and endmodule. So we handle it separately. + parseVerilogHierHeader(); + return true; } - break; + return false; case tok::kw_export: if (Style.isJavaScript()) { parseJavaScriptEs6ImportExport(); - return; + return true; } if (!Style.isCpp()) - break; + return false; // Handle C++ "(inline|export) namespace". LLVM_FALLTHROUGH; case tok::kw_inline: nextToken(); if (FormatTok->is(tok::kw_namespace)) { parseNamespace(); - return; + return true; } - break; - case tok::identifier: - if (FormatTok->is(TT_ForEachMacro)) { - parseForOrWhileLoop(); - return; - } - if (FormatTok->is(TT_MacroBlockBegin)) { - parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, - /*MunchSemi=*/false); - return; - } - if (FormatTok->is(Keywords.kw_import)) { - if (Style.isJavaScript()) { - parseJavaScriptEs6ImportExport(); - return; - } - if (Style.Language == FormatStyle::LK_Proto) { - nextToken(); - if (FormatTok->is(tok::kw_public)) - nextToken(); - if (!FormatTok->is(tok::string_literal)) - return; - nextToken(); - if (FormatTok->is(tok::semi)) - nextToken(); - addUnwrappedLine(); - return; - } - if (Style.isCpp()) { - parseModuleImport(); - return; - } - } - if (Style.isCpp() && - FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_Q_SIGNALS, - Keywords.kw_slots, Keywords.kw_Q_SLOTS)) { - nextToken(); - if (FormatTok->is(tok::colon)) { - nextToken(); - addUnwrappedLine(); - return; - } - } - if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { - parseStatementMacro(); - return; - } - if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { - parseNamespace(); - return; - } - // In all other cases, parse the declaration. - break; + return false; default: - break; + return false; } +} + +void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind, + bool IsTopLevel, + TokenType NextLBracesType, + bool *HasLabel) { + if (parseStatementSpecial(IfKind)) + return; + do { const FormatToken *Previous = FormatTok->Previous; switch (FormatTok->Tok.getKind()) { @@ -1512,7 +1663,8 @@ nextToken(); parseBracedList(); break; - } else if (Style.Language == FormatStyle::LK_Java && + } else if ((Style.Language == FormatStyle::LK_Java || + Style.Language == FormatStyle::LK_CSharp) && FormatTok->is(Keywords.kw_interface)) { nextToken(); break; @@ -1593,8 +1745,8 @@ // enum definition can start a structural element. if (!parseEnum()) break; - // This only applies for C++. - if (!Style.isCpp()) { + // This only applies for C++ and Verilog. + if (!(Style.isCpp() || Style.Language == FormatStyle::LK_Verilog)) { addUnwrappedLine(); return; } @@ -1607,9 +1759,15 @@ Keywords.kw_NS_CLOSED_ENUM)) parseEnum(); break; + case tok::kw_class: + if (Style.Language == FormatStyle::LK_Verilog) { + parseBlock(BLOCK_VERILOG_HIER); + addUnwrappedLine(); + return; + } + LLVM_FALLTHROUGH; case tok::kw_struct: case tok::kw_union: - case tok::kw_class: if (parseStructLike()) return; break; @@ -1630,6 +1788,7 @@ addUnwrappedLine(); return; case tok::r_brace: + case_r_brace: addUnwrappedLine(); return; case tok::l_paren: { @@ -1740,12 +1899,40 @@ return; } - if (FormatTok->is(Keywords.kw_interface)) { + if (Style.Language == FormatStyle::LK_CSharp && + FormatTok->is(Keywords.kw_interface)) { if (parseStructLike()) return; break; } + if (Style.Language == FormatStyle::LK_Verilog) { + if (Keywords.isVerilogEnd(*FormatTok)) + goto case_r_brace; + if (FormatTok->is(Keywords.kw_table)) { + parseVerilogTable(); + return; + } + if (FormatTok->is(Keywords.kw_constraint)) { + nextToken(); + parseVerilogHierIdentifier(); + if (FormatTok->is(tok::l_brace)) + parseBlock(); + addUnwrappedLine(); + return; + } + if (Keywords.isVerilogBegin(*FormatTok)) { + parseBlock(); + addUnwrappedLine(); + return; + } + if (Keywords.isVerilogHier(*FormatTok)) { + parseBlock(BLOCK_VERILOG_HIER); + addUnwrappedLine(); + return; + } + } + if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { parseStatementMacro(); return; @@ -1763,9 +1950,30 @@ break; TokenCount = Line->Tokens.size(); - if (TokenCount == 1 || - (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { - if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) { + + // Determine whether the line might be a single macro expansion. + // In Verilog macro expansions begin with a backtick. + bool IsSingleMacro = true; + { + const UnwrappedLineNode *Tok = &Line->Tokens.front(), + *End = Tok + TokenCount; + while (Tok != End && Tok->Tok->is(tok::comment)) + ++Tok; + if (Style.Language == FormatStyle::LK_Verilog) { + if (Tok != End && Tok->Tok->is(Keywords.backtick)) + ++Tok; + else + IsSingleMacro = false; + } + if (End - Tok != 1) + IsSingleMacro = false; + } + + if (IsSingleMacro) { + // In Verilog labels can be any expression, so we don't do them + // here. + if (Style.Language != FormatStyle::LK_Verilog && + FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { Line->Tokens.begin()->Tok->MustBreakBefore = true; parseLabel(!Style.IndentGotoLabels); if (HasLabel) @@ -1827,6 +2035,42 @@ break; parseCaseLabel(); break; + case tok::kw_default: + nextToken(); + if (Style.Language != FormatStyle::LK_Verilog || + FormatTok->is(tok::colon)) + break; + // In Verilog the default label doesn't need the colon. But the + // default keyword also occurs in clocking blocks. + if (FormatTok->is(Keywords.kw_clocking)) { + parseBlock(BLOCK_VERILOG_HIER); + addUnwrappedLine(); + return; + } + goto handle_verilog_case_label; + case tok::colon: { + // In Verilog case labels are complicated so we look for + // constructs following all colons. + nextToken(); + if (Style.Language != FormatStyle::LK_Verilog) + break; + handle_verilog_case_label: + // The label will get unindented in AnnotatingParser. If there are + // no leading spaces, indent the rest here. We don't use + // parseLabel because we don't know whether this colon is a label + // or a ternary expression at this point. + auto OrigLevel = Line->Level; + auto FirstLine = CurrentLines->size(); + if (!Line->isIndented()) + ++Line->Level; + else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) + --Line->Level; + parseStructuralElement(IfKind, IsTopLevel, NextLBracesType); + if (CurrentLines->size() > FirstLine) + (*CurrentLines)[FirstLine].Level = OrigLevel; + Line->Level = OrigLevel; + return; + } default: nextToken(); break; @@ -1893,7 +2137,7 @@ return true; case tok::l_brace: ++Line->Level; - parseBlock(/*MustBeDeclaration=*/true); + parseBlock(BLOCK_MUST_BE_DECLARATION); addUnwrappedLine(); --Line->Level; break; @@ -2412,9 +2656,24 @@ } FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, - bool KeepBraces) { - assert(FormatTok->is(tok::kw_if) && "'if' expected"); + bool KeepBraces, + bool IsVerilogAssert) { + assert(FormatTok->isOneOf(tok::kw_if, Keywords.kw_assert, Keywords.kw_assume, + Keywords.kw_cover) && + "'if' expected"); nextToken(); + + if (IsVerilogAssert) { + // `#0` or `final` + if (FormatTok->is(tok::hash)) { + nextToken(); + if (FormatTok->is(tok::numeric_constant)) + nextToken(); + } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, + Keywords.kw_sequence)) + nextToken(); + } + if (FormatTok->is(tok::exclaim)) nextToken(); if (FormatTok->is(tok::kw_consteval)) { @@ -2428,21 +2687,29 @@ } } handleAttributes(); + // In Verilog assert statements the if action is optional. + if (IsVerilogAssert && FormatTok->is(tok::semi)) { + nextToken(); + addUnwrappedLine(); + return nullptr; + } bool NeedsUnwrappedLine = false; keepAncestorBraces(); - FormatToken *IfLeftBrace = nullptr; IfStmtKind IfBlockKind = IfStmtKind::NotIf; - if (FormatTok->is(tok::l_brace)) { + if (Keywords.isBlockBegin(*FormatTok, Style)) { IfLeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); + IfLeftBrace = FormatTok; IfBlockKind = parseBlock(); if (Style.BraceWrapping.BeforeElse) addUnwrappedLine(); else NeedsUnwrappedLine = true; + } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { + addUnwrappedLine(); } else { parseUnbracedBody(); } @@ -2465,13 +2732,14 @@ } nextToken(); handleAttributes(); - if (FormatTok->is(tok::l_brace)) { + if (Keywords.isBlockBegin(*FormatTok, Style)) { ElseLeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); + ElseLeftBrace = FormatTok; if (parseBlock() == IfStmtKind::IfOnly) Kind = IfStmtKind::IfElseIf; addUnwrappedLine(); - } else if (FormatTok->is(tok::kw_if)) { + } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { FormatToken *Previous = Tokens->getPreviousToken(); const bool IsPrecededByComment = Previous && Previous->is(tok::comment); if (IsPrecededByComment) { @@ -2658,9 +2926,9 @@ if (ManageWhitesmithsBraces) ++Line->Level; - parseBlock(/*MustBeDeclaration=*/true, AddLevels, - /*MunchSemi=*/true, - /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces); + parseBlock(BLOCK_MUST_BE_DECLARATION | (ManageWhitesmithsBraces * + BLOCK_UNINDENT_WHITESMITHS_BRACES), + AddLevels); // Munch the semicolon after a namespace. This is more common than one would // think. Putting the semicolon into its own line is very ugly. @@ -2719,7 +2987,7 @@ keepAncestorBraces(); - if (FormatTok->is(tok::l_brace)) { + if (Keywords.isBlockBegin(*FormatTok, Style)) { FormatToken *LeftBrace = FormatTok; parseBlock(); if (BracesAreOptional && Style.RemoveBracesLLVM) { @@ -2737,23 +3005,55 @@ NestedTooDeep.pop_back(); } -void UnwrappedLineParser::parseForOrWhileLoop() { - assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && +void UnwrappedLineParser::parseVerilogSensitivityList() { + if (!FormatTok->is(tok::at)) + return; + nextToken(); + // A block event expression has 2 at signs. + if (FormatTok->is(tok::at)) + nextToken(); + switch (FormatTok->Tok.getKind()) { + case tok::star: + nextToken(); + break; + case tok::l_paren: + parseParens(); + break; + default: + parseVerilogHierIdentifier(); + break; + } +} + +void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { + assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || + (Style.Language == FormatStyle::LK_Verilog && + FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, + Keywords.kw_always_ff, Keywords.kw_always_latch, + Keywords.kw_final, Keywords.kw_initial, + Keywords.kw_foreach, Keywords.kw_forever, + Keywords.kw_repeat))) && "'for', 'while' or foreach macro expected"); // Those that begin with a for require special treatment because inside the - // parentheses is not an expression. - bool IsFor = FormatTok->is(tok::kw_for) || FormatTok->is(TT_ForEachMacro); + // parentheses is not an expression. In Verilog following `foreach` is an + // expression. + bool IsFor = FormatTok->is(tok::kw_for) || + (Style.Language != FormatStyle::LK_Verilog && + FormatTok->is(TT_ForEachMacro)); nextToken(); // JS' for await ( ... if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) nextToken(); if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) nextToken(); - if (FormatTok->is(tok::l_paren)) { + if (HasParens && FormatTok->is(tok::l_paren)) { if (!IsFor) FormatTok->setType(TT_ConditionLParen); parseParens(); } + // event control + if (Style.Language == FormatStyle::LK_Verilog) + parseVerilogSensitivityList(); parseIndentedBlock(/*BracesAreOptional=*/true); } @@ -2781,6 +3081,9 @@ } void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { + // End-of-file may occur if we are parsing an incomplete case label in + // a macro. + assert(FormatTok->isOneOf(tok::eof, tok::colon)); nextToken(); unsigned OldLineLevel = Line->Level; if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) @@ -3282,7 +3585,14 @@ while (FormatTok->Tok.getIdentifierInfo() || FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, tok::greater, tok::comma, tok::question)) { - nextToken(); + if (Style.Language == FormatStyle::LK_Verilog) { + FormatTok->setType(TT_VerilogDimensionedTypeName); + nextToken(); + // In Verilog the base type can have dimensions. + while (FormatTok->is(tok::l_square)) + parseSquare(); + } else + nextToken(); // We can have macros or attributes in between 'enum' and the enum name. if (FormatTok->is(tok::l_paren)) parseParens(); @@ -3307,7 +3617,7 @@ return true; } if (Style.Language == FormatStyle::LK_Proto) { - parseBlock(/*MustBeDeclaration=*/true); + parseBlock(BLOCK_MUST_BE_DECLARATION); return true; } @@ -3351,23 +3661,6 @@ return false; } -namespace { -// A class used to set and restore the Token position when peeking -// ahead in the token source. -class ScopedTokenPosition { - unsigned StoredPosition; - FormatTokenSource *Tokens; - -public: - ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { - assert(Tokens && "Tokens expected to not be null"); - StoredPosition = Tokens->getPosition(); - } - - ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } -}; -} // namespace - // Look to see if we have [[ by looking ahead, if // its not then rewind to the original position. bool UnwrappedLineParser::tryToParseSimpleAttribute() { @@ -3431,8 +3724,7 @@ while (FormatTok) { if (FormatTok->is(tok::l_brace)) { // Parse the constant's class body. - parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, - /*MunchSemi=*/false); + parseBlock(BLOCK_MUST_BE_DECLARATION | BLOCK_KEEP_TRAILING_SEMI); } else if (FormatTok->is(tok::l_paren)) { parseParens(); } else if (FormatTok->is(tok::comma)) { @@ -3464,11 +3756,14 @@ // The actual identifier can be a nested name specifier, and in macros // it is often token-pasted. // An [[attribute]] can be before the identifier. - while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, + while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw___attribute, tok::kw___declspec, tok::kw_alignas, tok::l_square, tok::r_square) || + Keywords.isPPHashHash(*FormatTok, Style) || ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && - FormatTok->isOneOf(tok::period, tok::comma))) { + FormatTok->isOneOf(tok::period, tok::comma)) || + (Style.Language == FormatStyle::LK_Verilog && + FormatTok->is(Keywords.backtick))) { if (Style.isJavaScript() && FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { // JavaScript/TypeScript supports inline object types in @@ -3480,9 +3775,16 @@ continue; } } - bool IsNonMacroIdentifier = - FormatTok->is(tok::identifier) && - FormatTok->TokenText != FormatTok->TokenText.upper(); + bool IsNonMacroIdentifier; + if (!FormatTok->is(tok::identifier)) + IsNonMacroIdentifier = false; + else if (Style.Language == FormatStyle::LK_Verilog) { + // In Verilog macro expansions start with backtick. + const FormatToken *Prev = Tokens->getPreviousToken(); + IsNonMacroIdentifier = !(Prev && Prev->is(Keywords.backtick)); + } else + IsNonMacroIdentifier = + FormatTok->TokenText != FormatTok->TokenText.upper(); nextToken(); // We can have macros or attributes in between 'class' and the class name. if (!IsNonMacroIdentifier) { @@ -3561,7 +3863,8 @@ addUnwrappedLine(); unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; - parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); + parseBlock(BLOCK_MUST_BE_DECLARATION | BLOCK_KEEP_TRAILING_SEMI, + AddLevels); } } // There is no addUnwrappedLine() here so that we fall through to parsing a @@ -3651,7 +3954,7 @@ if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterObjCDeclaration) addUnwrappedLine(); - parseBlock(/*MustBeDeclaration=*/true); + parseBlock(BLOCK_MUST_BE_DECLARATION); } // With instance variables, this puts '}' on its own line. Without instance @@ -3773,6 +4076,135 @@ addUnwrappedLine(); } +void UnwrappedLineParser::parseVerilogHierIdentifier() { + // consume things like a::`b.c[d:e] or a::* + while (1) { + if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, + tok::coloncolon, Keywords.backtick) || + Keywords.isVerilogIdentifier(*FormatTok)) + nextToken(); + else if (FormatTok->is(tok::l_square)) + parseSquare(); + else + break; + } +} + +unsigned UnwrappedLineParser::parseVerilogHierHeader() { + unsigned AddLevels = 0; + + if (FormatTok->is(Keywords.kw_clocking)) { + nextToken(); + if (Keywords.isVerilogIdentifier(*FormatTok)) + nextToken(); + parseVerilogSensitivityList(); + if (FormatTok->is(tok::semi)) + nextToken(); + } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, + Keywords.kw_casez, Keywords.kw_randcase, + Keywords.kw_randsequence)) { + AddLevels += Style.IndentCaseLabels; + nextToken(); + if (FormatTok->is(tok::l_paren)) { + FormatTok->setType(TT_ConditionLParen); + parseParens(); + } + if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) + nextToken(); + // The case header has no semcolon. + } else { + // "module" etc. + nextToken(); + // all the words like the name of the module and specifiers like + // "automatic" and the width of function return type + while (true) { + if (FormatTok->is(tok::l_square)) { + auto Prev = FormatTok->getPreviousNonComment(); + if (Prev && Keywords.isVerilogIdentifier(*Prev)) + Prev->setType(TT_VerilogDimensionedTypeName); + parseSquare(); + } else if (Keywords.isVerilogIdentifier(*FormatTok) || + FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) + nextToken(); + else + break; + } + + auto NewLine = [this]() { + addUnwrappedLine(); + Line->IsContinuation = true; + }; + + // package imports + while (FormatTok->is(Keywords.kw_import)) { + NewLine(); + nextToken(); + parseVerilogHierIdentifier(); + if (FormatTok->is(tok::semi)) + nextToken(); + } + + // parameters and ports + if (FormatTok->is(tok::hash)) { + NewLine(); + nextToken(); + if (FormatTok->is(tok::l_paren)) + parseParens(); + } + if (FormatTok->is(tok::l_paren)) { + NewLine(); + parseParens(); + } + + // extends and implements + if (FormatTok->is(Keywords.kw_extends)) { + NewLine(); + nextToken(); + parseVerilogHierIdentifier(); + if (FormatTok->is(tok::l_paren)) + parseParens(); + } + if (FormatTok->is(Keywords.kw_implements)) { + NewLine(); + do { + nextToken(); + parseVerilogHierIdentifier(); + } while (FormatTok->is(tok::comma)); + } + + // Coverage event for cover groups. + if (FormatTok->is(tok::at)) { + NewLine(); + parseVerilogSensitivityList(); + } + + if (FormatTok->is(tok::semi)) + nextToken(/*LevelDifference=*/1); + addUnwrappedLine(); + } + + return AddLevels; +} + +void UnwrappedLineParser::parseVerilogTable() { + assert(FormatTok->is(Keywords.kw_table)); + nextToken(/*LevelDifference=*/1); + addUnwrappedLine(); + + unsigned InitialLevel = Line->Level++; + while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { + FormatToken *Tok = FormatTok; + nextToken(); + if (Tok->is(tok::semi)) + addUnwrappedLine(); + else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) + Tok->setType(TT_VerilogTableItem); + } + Line->Level = InitialLevel; + nextToken(/*LevelDifference=*/-1); + addUnwrappedLine(); +} + LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix = "") { llvm::dbgs() << Prefix << "Line(" << Line.Level @@ -3810,6 +4242,7 @@ Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; Line->FirstStartColumn = 0; + Line->IsContinuation = false; if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) --Line->Level; @@ -4062,7 +4495,9 @@ FirstNonCommentOnLine, *FormatTok, PreviousWasComment); PreviousWasComment = FormatTok->is(tok::comment); - while (!Line->InPPDirective && FormatTok->is(tok::hash) && + while (!Line->InPPDirective && Keywords.isPPHash(*FormatTok, Style) && + (Style.Language != FormatStyle::LK_Verilog || + Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && FirstNonCommentOnLine) { distributeComments(Comments, FormatTok); Comments.clear(); diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -790,7 +790,10 @@ return Style.AlignConsecutiveAssignments.AlignCompound ? C.Tok->getPrecedence() == prec::Assignment - : C.Tok->is(tok::equal); + : C.Tok->is(tok::equal) || + (Style.Language == FormatStyle::LK_Verilog && + C.Tok->is(tok::lessequal) && + C.Tok->getPrecedence() == prec::Assignment); }, Changes, /*StartAt=*/0, Style.AlignConsecutiveAssignments, /*RightJustify=*/true); diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -75,12 +75,10 @@ cl::init(clang::format::DefaultFallbackStyle), cl::cat(ClangFormatCategory)); -static cl::opt AssumeFileName( - "assume-filename", - cl::desc("Override filename used to determine the language.\n" - "When reading from stdin, clang-format assumes this\n" - "filename to determine the language."), - cl::init(""), cl::cat(ClangFormatCategory)); +static cl::opt + AssumeFileName("assume-filename", + cl::desc(clang::format::getAssumeFilenameHelp()), + cl::init(""), cl::cat(ClangFormatCategory)); static cl::opt Inplace("i", cl::desc("Inplace edit s, if specified."), diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt --- a/clang/unittests/Format/CMakeLists.txt +++ b/clang/unittests/Format/CMakeLists.txt @@ -17,6 +17,7 @@ FormatTestSelective.cpp FormatTestTableGen.cpp FormatTestTextProto.cpp + FormatTestVerilog.cpp MacroExpanderTest.cpp NamespaceEndCommentsFixerTest.cpp QualifierFixerTest.cpp diff --git a/clang/unittests/Format/FormatTestUtils.h b/clang/unittests/Format/FormatTestUtils.h --- a/clang/unittests/Format/FormatTestUtils.h +++ b/clang/unittests/Format/FormatTestUtils.h @@ -19,7 +19,7 @@ namespace format { namespace test { -inline std::string messUp(llvm::StringRef Code) { +inline std::string messUp(llvm::StringRef Code, bool HandleHash = true) { std::string MessedUp(Code.str()); bool InComment = false; bool InPreprocessorDirective = false; @@ -29,7 +29,7 @@ if (JustReplacedNewline) MessedUp[i - 1] = '\n'; InComment = true; - } else if (MessedUp[i] == '#' && + } else if (HandleHash && MessedUp[i] == '#' && (JustReplacedNewline || i == 0 || MessedUp[i - 1] == '\n')) { if (i != 0) MessedUp[i - 1] = '\n'; diff --git a/clang/unittests/Format/FormatTestVerilog.cpp b/clang/unittests/Format/FormatTestVerilog.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Format/FormatTestVerilog.cpp @@ -0,0 +1,473 @@ +//===- unittest/Format/FormatTestVerilog.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FormatTestUtils.h" +#include "clang/Format/Format.h" +#include "llvm/Support/Debug.h" +#include "gtest/gtest.h" + +#define DEBUG_TYPE "format-test" + +namespace clang { +namespace format { + +class FormatTestVerilog : public ::testing::Test { +protected: + static std::string format(llvm::StringRef Code, unsigned Offset, + unsigned Length, const FormatStyle &Style) { + LLVM_DEBUG(llvm::errs() << "---\n"); + LLVM_DEBUG(llvm::errs() << Code << "\n\n"); + std::vector Ranges(1, tooling::Range(Offset, Length)); + tooling::Replacements Replaces = reformat(Style, Code, Ranges); + auto Result = applyAllReplacements(Code, Replaces); + EXPECT_TRUE(static_cast(Result)); + LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n"); + return *Result; + } + + static std::string format(llvm::StringRef Code, const FormatStyle &Style) { + return format(Code, 0, Code.size(), Style); + } + + static void verifyFormat( + llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle(FormatStyle::LK_Verilog)) { + EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable"; + EXPECT_EQ(Code.str(), + format(test::messUp(Code, /*HandleHash=*/false), Style)); + } +}; + +TEST_F(FormatTestVerilog, Align) { + FormatStyle Style = getLLVMStyle(FormatStyle::LK_Verilog); + Style.AlignConsecutiveAssignments.Enabled = true; + verifyFormat("x <= x;\n" + "sfdbddfbdfbb <= x;\n" + "x = x;\n", + Style); + verifyFormat("x = x;\n" + "sfdbddfbdfbb = x;\n" + "x = x;\n", + Style); +} + +TEST_F(FormatTestVerilog, AlignPorts) { + verifyFormat("module x\n" + " (output wire logic signed [1 : 0][0 : 1] " + "xxxxxxxxxxxxxxxxxxxxxxxxxxx[1],\n" + " " + "xxxxxxxxxxxxxxxxxxxx[0 : 2],\n" + " var `x `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,\n" + " xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx);\n" + "endmodule\n"); +} + +TEST_F(FormatTestVerilog, Always) { + verifyFormat("always\n" + " x = x;\n" + "always_ff @(posedge x)\n" + " x = x;\n" + "always @*\n" + " x = x;\n"); +} + +TEST_F(FormatTestVerilog, Assign) { + verifyFormat("assign (strong1, pull0) mynet = enable,\n" + " x = x;\n" + "assign x = x;\n"); +} + +TEST_F(FormatTestVerilog, Attribute) { + verifyFormat("(* ASYNC_REG = \"TRUE\" *) reg sync_0, sync_1;\n"); +} + +TEST_F(FormatTestVerilog, BasedLiteral) { + verifyFormat("x = '0;\n" + "x = '1;\n" + "x = 'X;\n" + "x = 'x;\n" + "x = 'Z;\n" + "x = 'z;\n" + "x = 659;\n" + "x = 'h837ff;\n" + "x = 'o7460;\n" + "x = 4'b1001;\n" + "x = 5'D3;\n" + "x = 3'b01x;\n" + "x = 12'hx;\n" + "x = 16'hz;\n" + "x = -8'd6;\n" + "x = 4'shf;\n" + "x = -4'sd15;\n" + "x = 16'sd?;\n"); +} + +TEST_F(FormatTestVerilog, Case) { + verifyFormat("case (data)\n" + " 16'd0:\n" + " result = 10'b0111111111;\n" + " 16'd1: fork\n" + " result = 10'b1011111111;\n" + " join\n" + " default\n" + " result = 'x;\n" + "endcase\n" + "casez (ir)\n" + " 8'b1???????:\n" + " instruction1(ir);\n" + " 8'b01??????:\n" + " instruction2(ir);\n" + " x ? 8'b1??????? : 1:\n" + " instruction3(ir);\n" + " default:\n" + " instruction4(ir);\n" + "endcase\n" + "priority casez (a)\n" + " 3'b00?:\n" + " $display(\"0 or 1\");\n" + " 3'b0??:\n" + " $display(\"2 or 3\");\n" + "endcase\n" + "(* full_case *) unique casex (x)\n" + " 0:\n" + " x = 0;\n" + "endcase\n" + "(* full_case *) case (x)\n" + " 0:\n" + " x = 0;\n" + "endcase\n"); +} + +TEST_F(FormatTestVerilog, Cast) { + verifyFormat("x = signed'({x, x{x}, {<<{x}}});\n"); + verifyFormat("x = logic'(x);\n"); + verifyFormat("x = 1'(x);\n"); +} + +TEST_F(FormatTestVerilog, Checker) { + verifyFormat("checker my_check\n" + " (logic clk, active);\n" + " bit active_d1 = 1'b0;\n" + " always_ff @(posedge clk) begin\n" + " active_d1 <= active;\n" + " end\n" + " covergroup cg_active\n" + " @(posedge clk);\n" + " cp_active:\n" + " coverpoint active {\n" + " bins idle = {1'b0};\n" + " bins active = {1'b1};\n" + " }\n" + " cp_active_d1:\n" + " coverpoint active_d1 {\n" + " bins idle = {1'b0};\n" + " bins active = {1'b1};\n" + " }\n" + " option.per_instance = 1;\n" + " endgroup\n" + " cg_active cg_active_1 = new ();\n" + "endchecker : my_check\n"); +} + +TEST_F(FormatTestVerilog, Constraint) { + verifyFormat("constraint C::proto1 { x inside {-4, 5, 7}; }\n" + "constraint C::proto2 { x >= 0; }\n"); +} + +TEST_F(FormatTestVerilog, Equality) { + verifyFormat("x = x == 1;\n" + "x = x === 1;\n" + "x = x ==? 1;\n" + "x = x != 1;\n" + "x = x !== 1;\n" + "x = x !=? 1;\n"); +} + +TEST_F(FormatTestVerilog, EscapedIdentifier) { + verifyFormat("\\{{{((((( = \\;;;; ;\n" + "\\busa+index = \\-clock ;\n" + "x = (\\***error-condition*** - \\net1/\\net2 << \\{a,b} ) + " + "\\a*(b+c) ;\n"); +} + +TEST_F(FormatTestVerilog, Extern) { + verifyFormat("extern module x;\n" + "extern class x;\n" + "extern module x\n" + " #(parameter x)\n" + " (output x);\n"); +} + +TEST_F(FormatTestVerilog, Foreach) { + verifyFormat("foreach (x[x])\n" + " repeat (x)\n" + " while (x)\n" + " forever\n" + " x = x;\n"); +} + +TEST_F(FormatTestVerilog, GroupPorts) { + verifyFormat("module x\n" + " #(parameter bit signed [1 : 0] x = 0, //\n" + " y = 0)\n" + " (output x, //\n" + " y,\n" + " var xx);\n" + "endmodule\n"); +} + +TEST_F(FormatTestVerilog, If) { + verifyFormat("if (x)\n" + " priority if (x)\n" + " unique0 if (x)\n" + " assert #0 (x)\n" + " $display;\n" + " else\n" + " assume (x)\n" + " else\n" + " cover (x)\n" + " $display;\n" + "assert property (p(test_sig))\n" + "else\n" + " cover property (!test_sig ##1 test_sig)\n" + " else\n" + " x;\n" + "assume final (x)\n" + "else\n" + " x;\n" + "assert (x);\n"); +} + +TEST_F(FormatTestVerilog, Module) { + verifyFormat("module x;\n" + " module x;\n" + " module x;\n" + " interface x\n" + " (input x);\n" + " function automatic logic [1 : 0] x\n" + " (input x);\n" + " endfunction\n" + " endinterface : x\n" + " endmodule : x\n" + " endmodule\n" + "endmodule : x\n"); + verifyFormat("module x\n" + " import x.x::x::*;\n" + " import x;\n" + " #(parameter x)\n" + " (output x);\n" + " generate\n" + " program x;\n" + " program x;\n" + " task x;\n" + " virtual class x\n" + " (x)\n" + " extends x(x)\n" + " implements x, x, x;\n" + " class x;\n" + " endclass\n" + " endclass : x\n" + " endtask\n" + " endprogram : x\n" + " endprogram : x\n" + " endgenerate\n" + " clocking ck1 @(posedge clk);\n" + " default input #1step output negedge;\n" + " endclocking\n" + "endmodule"); +} + +TEST_F(FormatTestVerilog, ModuleInst) { + verifyFormat("x x;\n" + "ffnand ff1(.q(),\n" + " .qbar(out1),\n" + " .clear(in1),\n" + " .preset(in2)),\n" + " ff2(.q(),\n" + " .qbar(out2),\n" + " .clear(in2),\n" + " .preset(in1),\n" + " .q(),\n" + " .*),\n" + " ff3(.q(out3),\n" + " .qbar(),\n" + " .clear(in1),\n" + " .preset(in2));\n" + "alu alu(alu_out,\n" + " ,\n" + " ain,\n" + " bin,\n" + " opcode);\n" + "accum accum(dataout[7 : 0],\n" + " alu_out,\n" + " clk,\n" + " rst_n);\n" + "xtend xtend(dataout[15 : 8],\n" + " alu_out[7],\n" + " clk);\n" + "alu alu(.alu_out(alu_out),\n" + " .zero,\n" + " .ain(ain),\n" + " .bin(bin),\n" + " .opcode(opcode));\n" + "alu alu(.*,\n" + " .zero());\n" + "accum accum(.*,\n" + " .dataout(dataout[7 : 0]),\n" + " .datain(alu_out));\n"); +} + +TEST_F(FormatTestVerilog, PartSelect) { + verifyFormat("int i = bitvec[j +: k];\n" + "int a[x : y], b[y : z], e;\n" + "a = {b[c -: d], e};\n"); +} + +TEST_F(FormatTestVerilog, Preprocessor) { + EXPECT_EQ("module x;\n" + "`timescale 1ns / 1ps\n" + " `x;\n" + "endmodule\n" + "`define append(f) f``_master\n" + "`define x `x\n" + "`define foo " + " \\\n" + " do begin " + " \\\n" + " x = x; " + " \\\n" + " end while (1'b0)\n" + "`x = (`x1 + `x2 + x);\n", + format("module x;\n" + "`timescale 1ns / 1ps\n" + "`x\n" + ";\n" + "endmodule\n" + "`define append(f) f``_master\n" + "`define x `x\n" + "`define foo \\\n" + "do begin x = x; end while (1'b0)\n" + "`x = (`x1 + `x2 + x);\n", + getLLVMStyle(FormatStyle::LK_Verilog))); +} + +TEST_F(FormatTestVerilog, Primitive) { + verifyFormat("primitive multiplexer\n" + " (mux, control, dataA, dataB);\n" + " output mux;\n" + " input control, dataA, dataB;\n" + " table\n" + " 0 1 ? : 1;\n" + " 0 0 ? : 0;\n" + " 1 ? 1 : 1;\n" + " 1 ? 0 : 0;\n" + " x 0 0 : 0;\n" + " x 1 1 : 1;\n" + " endtable\n" + "endprimitive\n" + "primitive latch\n" + " (q, ena_, data);\n" + " output q;\n" + " reg q;\n" + " input ena_, data;\n" + " table\n" + " 0 1 : ? : 1;\n" + " 0 0 : ? : 0;\n" + " 1 ? : ? : -;\n" + " ? * : ? : -;\n" + " endtable\n" + "endprimitive\n" + "primitive d\n" + " (q, clock, data);\n" + " output q;\n" + " reg q;\n" + " input clock, data;\n" + " table\n" + " (01) 0 : ? : 0;\n" + " (01) 1 : ? : 1;\n" + " (0?) 1 : 1 : 1;\n" + " (0?) 0 : 0 : 0;\n" + " (?0) ? : ? : -;\n" + " (?\?) ? : ? : -;\n" + " endtable\n" + "endprimitive\n"); +} + +TEST_F(FormatTestVerilog, Shifts) { + verifyFormat("x = x << 1;\n" + "x = x <<< 1;\n" + "x = x >> 1;\n" + "x = x >>> 1;\n" + "x <<= 1;\n" + "x <<<= 1;\n" + "x >>= 1;\n" + "x >>>= 1;\n"); +} + +TEST_F(FormatTestVerilog, Specify) { + verifyFormat( + "specify\n" + " specparam tRise_clk_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq = 150,\n" + " tFall_clk_qqqqqqqqqqqqqqqqqqqqqqqqqqqqq = 200;\n" + " specparam tSetup = 70;\n" + " (clk => q) = (tRise_clk_q, tFall_clk_q);\n" + " $setup(d, posedge clk, tSetup);\n" + "endspecify\n"); + verifyFormat("(In1 => q) = In_to_q;\n" + "(s *> q) = s_to_q;\n" + "(In1 +=> q) = In_to_q;\n" + "(s +*> q) = s_to_q;\n" + "(In1 -=> q) = In_to_q;\n" + "(s -*> q) = s_to_q;\n"); +} + +TEST_F(FormatTestVerilog, Streaming) { + verifyFormat("{>>{j}} = {<>4{6'b11_0101}};\n" + "x = {<<2{{<<{4'b1101}}}};\n"); +} + +TEST_F(FormatTestVerilog, Struct) { + verifyFormat("typedef struct {\n" + " struct {\n" + " typedef enum x {\n" + " XXXXXX,\n" + " XXXXXXXXXXXXXXXXXXXXXXXX1,\n" + " XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX2\n" + " } x;\n" + " enum {\n" + " XXXXXX,\n" + " XXXXXXXXXXXXXXXXXXXXXXXX1,\n" + " XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX2\n" + " } x;\n" + " enum bit [1 : 0] {\n" + " XXXXXX,\n" + " XXXXXXXXXXXXXXXXXXXXXXXX1,\n" + " XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX2\n" + " } x;\n" + " } x;\n" + "} x;\n"); +} + +TEST_F(FormatTestVerilog, StructLiteral) { + FormatStyle Style = getLLVMStyle(FormatStyle::LK_Verilog); + Style.SpacesInContainerLiterals = false; + verifyFormat("x = '{1: 1, x: x, default: 0};\n", Style); + Style.SpacesInContainerLiterals = true; + verifyFormat("x = '{x : x, default : 9};\n", Style); +} + +TEST_F(FormatTestVerilog, TimeLiteral) { + verifyFormat("wait 2.1ns;\n" + "wait 40ps;\n" + "x = 2.1ns;\n"); +} + +} // namespace format +} // end namespace clang