diff --git a/clang/docs/ClangFormat.rst b/clang/docs/ClangFormat.rst --- a/clang/docs/ClangFormat.rst +++ b/clang/docs/ClangFormat.rst @@ -43,6 +43,17 @@ --assume-filename= - Override filename used to determine the language. When reading from stdin, clang-format assumes this filename to determine the language. + Unrecognized filenames are treated as C++. + supported: + CSharp: .cs + Java: .java + JavaScript: .mjs .js .ts + Json: .json + Objective-C: .m .mm + Proto: .proto .protodevel + TableGen: .td + TextProto: .textpb .pb.txt .textproto .asciipb + Verilog: .sv .svh .v .vh --cursor= - The position of the cursor when invoking clang-format from an editor integration --dry-run - If set, do not actually make the formatting changes diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -2589,12 +2589,17 @@ LK_TableGen, /// Should be used for Protocol Buffer messages in text format /// (https://developers.google.com/protocol-buffers/). - LK_TextProto + LK_TextProto, + /// Should be used for Verilog and SystemVerilog. + /// https://standards.ieee.org/ieee/1800/6700/ + /// https://sci-hub.st/10.1109/IEEESTD.2018.8299595 + LK_Verilog }; bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; } bool isCSharp() const { return Language == LK_CSharp; } bool isJson() const { return Language == LK_Json; } bool isJavaScript() const { return Language == LK_JavaScript; } + bool isVerilog() const { return Language == LK_Verilog; } /// Language, this format style is targeted at. /// \version 3.5 @@ -4285,6 +4290,8 @@ return "TableGen"; case FormatStyle::LK_TextProto: return "TextProto"; + case FormatStyle::LK_Verilog: + return "Verilog"; default: return "Unknown"; } diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -3423,6 +3423,11 @@ return FormatStyle::LK_CSharp; if (FileName.endswith_insensitive(".json")) return FormatStyle::LK_Json; + if (FileName.endswith_insensitive(".sv") || + FileName.endswith_insensitive(".svh") || + FileName.endswith_insensitive(".v") || + FileName.endswith_insensitive(".vh")) + return FormatStyle::LK_Verilog; return FormatStyle::LK_Cpp; } diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -369,6 +369,10 @@ } bool isTypeFinalized() const { return TypeIsFinalized; } + /// Used to treat a token as if it were something else. For example, in + /// Verilog we want to treat the backtick like a hash. + tok::TokenKind AliasToken = tok::unknown; + /// The number of newlines immediately before the \c Token. /// /// This can be used to determine what the user wrote in the original code @@ -498,7 +502,10 @@ // in a configured macro expansion. llvm::Optional MacroCtx; - bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } + bool is(tok::TokenKind Kind) const { + // We don't use `Tok.is` here because it doesn't consider the alias. + return getKind() == Kind; + } bool is(TokenType TT) const { return getType() == TT; } bool is(const IdentifierInfo *II) const { return II && II == Tok.getIdentifierInfo(); @@ -519,6 +526,10 @@ } template bool isNot(T Kind) const { return !is(Kind); } + tok::TokenKind getKind() const { + return AliasToken == tok::unknown ? Tok.getKind() : AliasToken; + } + bool isIf(bool AllowConstexprMacro = true) const { return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) || (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro); @@ -574,7 +585,7 @@ return endsSequenceInternal(K1, Tokens...); } - bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } + bool isStringLiteral() const { return tok::isStringLiteral(getKind()); } bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { return Tok.isObjCAtKeyword(Kind); @@ -634,7 +645,7 @@ } bool isUnaryOperator() const { - switch (Tok.getKind()) { + switch (getKind()) { case tok::plus: case tok::plusplus: case tok::minus: @@ -662,7 +673,7 @@ /// Returns \c true if this is a keyword that can be used /// like a function call (e.g. sizeof, typeid, ...). bool isFunctionLikeKeyword() const { - switch (Tok.getKind()) { + switch (getKind()) { case tok::kw_throw: case tok::kw_typeid: case tok::kw_return: @@ -713,7 +724,7 @@ } prec::Level getPrecedence() const { - return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true, + return getBinOpPrecedence(getKind(), /*GreaterThanIsOperator=*/true, /*CPlusPlus11=*/true); } @@ -1003,6 +1014,118 @@ kw_when = &IdentTable.get("when"); kw_where = &IdentTable.get("where"); + kw_always = &IdentTable.get("always"); + kw_always_comb = &IdentTable.get("always_comb"); + kw_always_ff = &IdentTable.get("always_ff"); + kw_always_latch = &IdentTable.get("always_latch"); + kw_assign = &IdentTable.get("assign"); + kw_assume = &IdentTable.get("assume"); + kw_automatic = &IdentTable.get("automatic"); + kw_before = &IdentTable.get("before"); + kw_begin = &IdentTable.get("begin"); + kw_bins = &IdentTable.get("bins"); + kw_binsof = &IdentTable.get("binsof"); + kw_casex = &IdentTable.get("casex"); + kw_casez = &IdentTable.get("casez"); + kw_celldefine = &IdentTable.get("celldefine"); + kw_checker = &IdentTable.get("checker"); + kw_clocking = &IdentTable.get("clocking"); + kw_constraint = &IdentTable.get("constraint"); + kw_cover = &IdentTable.get("cover"); + kw_covergroup = &IdentTable.get("covergroup"); + kw_coverpoint = &IdentTable.get("coverpoint"); + kw_disable = &IdentTable.get("disable"); + kw_dist = &IdentTable.get("dist"); + kw_end = &IdentTable.get("end"); + kw_endcase = &IdentTable.get("endcase"); + kw_endchecker = &IdentTable.get("endchecker"); + kw_endclass = &IdentTable.get("endclass"); + kw_endclocking = &IdentTable.get("endclocking"); + kw_endfunction = &IdentTable.get("endfunction"); + kw_endgenerate = &IdentTable.get("endgenerate"); + kw_endgroup = &IdentTable.get("endgroup"); + kw_endinterface = &IdentTable.get("endinterface"); + kw_endmodule = &IdentTable.get("endmodule"); + kw_endpackage = &IdentTable.get("endpackage"); + kw_endprimitive = &IdentTable.get("endprimitive"); + kw_endprogram = &IdentTable.get("endprogram"); + kw_endproperty = &IdentTable.get("endproperty"); + kw_endsequence = &IdentTable.get("endsequence"); + kw_endspecify = &IdentTable.get("endspecify"); + kw_endtable = &IdentTable.get("endtable"); + kw_endtask = &IdentTable.get("endtask"); + kw_forever = &IdentTable.get("forever"); + kw_fork = &IdentTable.get("fork"); + kw_generate = &IdentTable.get("generate"); + kw_highz0 = &IdentTable.get("highz0"); + kw_highz1 = &IdentTable.get("highz1"); + kw_iff = &IdentTable.get("iff"); + kw_ifnone = &IdentTable.get("ifnone"); + kw_ignore_bins = &IdentTable.get("ignore_bins"); + kw_illegal_bins = &IdentTable.get("illegal_bins"); + kw_initial = &IdentTable.get("initial"); + kw_inout = &IdentTable.get("inout"); + kw_input = &IdentTable.get("input"); + kw_inside = &IdentTable.get("inside"); + kw_interconnect = &IdentTable.get("interconnect"); + kw_intersect = &IdentTable.get("intersect"); + kw_join = &IdentTable.get("join"); + kw_join_any = &IdentTable.get("join_any"); + kw_join_none = &IdentTable.get("join_none"); + kw_large = &IdentTable.get("large"); + kw_local = &IdentTable.get("local"); + kw_localparam = &IdentTable.get("localparam"); + kw_macromodule = &IdentTable.get("macromodule"); + kw_matches = &IdentTable.get("matches"); + kw_medium = &IdentTable.get("medium"); + kw_output = &IdentTable.get("output"); + kw_packed = &IdentTable.get("packed"); + kw_parameter = &IdentTable.get("parameter"); + kw_primitive = &IdentTable.get("primitive"); + kw_priority = &IdentTable.get("priority"); + kw_program = &IdentTable.get("program"); + kw_property = &IdentTable.get("property"); + kw_pull0 = &IdentTable.get("pull0"); + kw_pull1 = &IdentTable.get("pull1"); + kw_pure = &IdentTable.get("pure"); + kw_rand = &IdentTable.get("rand"); + kw_randc = &IdentTable.get("randc"); + kw_randcase = &IdentTable.get("randcase"); + kw_randsequence = &IdentTable.get("randsequence"); + kw_repeat = &IdentTable.get("repeat"); + kw_sample = &IdentTable.get("sample"); + kw_scalared = &IdentTable.get("scalared"); + kw_sequence = &IdentTable.get("sequence"); + kw_small = &IdentTable.get("small"); + kw_soft = &IdentTable.get("soft"); + kw_solve = &IdentTable.get("solve"); + kw_specify = &IdentTable.get("specify"); + kw_specparam = &IdentTable.get("specparam"); + kw_strong0 = &IdentTable.get("strong0"); + kw_strong1 = &IdentTable.get("strong1"); + kw_supply0 = &IdentTable.get("supply0"); + kw_supply1 = &IdentTable.get("supply1"); + kw_table = &IdentTable.get("table"); + kw_tagged = &IdentTable.get("tagged"); + kw_task = &IdentTable.get("task"); + kw_tri = &IdentTable.get("tri"); + kw_tri0 = &IdentTable.get("tri0"); + kw_tri1 = &IdentTable.get("tri1"); + kw_triand = &IdentTable.get("triand"); + kw_trior = &IdentTable.get("trior"); + kw_trireg = &IdentTable.get("trireg"); + kw_unique = &IdentTable.get("unique"); + kw_unique0 = &IdentTable.get("unique0"); + kw_uwire = &IdentTable.get("uwire"); + kw_vectored = &IdentTable.get("vectored"); + kw_wand = &IdentTable.get("wand"); + kw_weak0 = &IdentTable.get("weak0"); + kw_weak1 = &IdentTable.get("weak1"); + kw_wildcard = &IdentTable.get("wildcard"); + kw_wire = &IdentTable.get("wire"); + kw_with = &IdentTable.get("with"); + kw_wor = &IdentTable.get("wor"); + // Keep this at the end of the constructor to make sure everything here // is // already initialized. @@ -1026,6 +1149,42 @@ kw_set, kw_type, kw_typeof, kw_var, kw_yield, // Keywords from the Java section. kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); + + // Some keywords are not included here because they don't need special + // treatment like `showcancelled` or they should be treated as identifiers + // like `int` and `logic`. + VerilogExtraKeywords = std::unordered_set( + {kw_always, kw_always_comb, kw_always_ff, kw_always_latch, + kw_assert, kw_assign, kw_assume, kw_automatic, + kw_before, kw_begin, kw_bins, kw_binsof, + kw_casex, kw_casez, kw_celldefine, kw_checker, + kw_clocking, kw_constraint, kw_cover, kw_covergroup, + kw_coverpoint, kw_disable, kw_dist, kw_end, + kw_endcase, kw_endchecker, kw_endclass, kw_endclocking, + kw_endfunction, kw_endgenerate, kw_endgroup, kw_endinterface, + kw_endmodule, kw_endpackage, kw_endprimitive, kw_endprogram, + kw_endproperty, kw_endsequence, kw_endspecify, kw_endtable, + kw_endtask, kw_extends, kw_final, kw_foreach, + kw_forever, kw_fork, kw_function, kw_generate, + kw_highz0, kw_highz1, kw_iff, kw_ifnone, + kw_ignore_bins, kw_illegal_bins, kw_implements, kw_import, + kw_initial, kw_inout, kw_input, kw_inside, + kw_interconnect, kw_interface, kw_intersect, kw_join, + kw_join_any, kw_join_none, kw_large, kw_let, + kw_local, kw_localparam, kw_macromodule, kw_matches, + kw_medium, kw_output, kw_package, kw_packed, + kw_parameter, kw_primitive, kw_priority, kw_program, + kw_property, kw_pull0, kw_pull1, kw_pure, + kw_rand, kw_randc, kw_randcase, kw_randsequence, + kw_ref, kw_repeat, kw_sample, kw_scalared, + kw_sequence, kw_small, kw_soft, kw_solve, + kw_specify, kw_specparam, kw_strong0, kw_strong1, + kw_supply0, kw_supply1, kw_table, kw_tagged, + kw_task, kw_tri, kw_tri0, kw_tri1, + kw_triand, kw_trior, kw_trireg, kw_unique, + kw_unique0, kw_uwire, kw_var, kw_vectored, + kw_wand, kw_weak0, kw_weak1, kw_wildcard, + kw_wire, kw_with, kw_wor}); } // Context sensitive keywords. @@ -1130,6 +1289,119 @@ IdentifierInfo *kw_when; IdentifierInfo *kw_where; + // Verilog keywords + IdentifierInfo *kw_always; + IdentifierInfo *kw_always_comb; + IdentifierInfo *kw_always_ff; + IdentifierInfo *kw_always_latch; + IdentifierInfo *kw_assign; + IdentifierInfo *kw_assume; + IdentifierInfo *kw_automatic; + IdentifierInfo *kw_before; + IdentifierInfo *kw_begin; + IdentifierInfo *kw_bins; + IdentifierInfo *kw_binsof; + IdentifierInfo *kw_casex; + IdentifierInfo *kw_casez; + IdentifierInfo *kw_celldefine; + IdentifierInfo *kw_checker; + IdentifierInfo *kw_clocking; + IdentifierInfo *kw_constraint; + IdentifierInfo *kw_cover; + IdentifierInfo *kw_covergroup; + IdentifierInfo *kw_coverpoint; + IdentifierInfo *kw_disable; + IdentifierInfo *kw_dist; + IdentifierInfo *kw_end; + IdentifierInfo *kw_endcase; + IdentifierInfo *kw_endchecker; + IdentifierInfo *kw_endclass; + IdentifierInfo *kw_endclocking; + IdentifierInfo *kw_endfunction; + IdentifierInfo *kw_endgenerate; + IdentifierInfo *kw_endgroup; + IdentifierInfo *kw_endinterface; + IdentifierInfo *kw_endmodule; + IdentifierInfo *kw_endpackage; + IdentifierInfo *kw_endprimitive; + IdentifierInfo *kw_endprogram; + IdentifierInfo *kw_endproperty; + IdentifierInfo *kw_endsequence; + IdentifierInfo *kw_endspecify; + IdentifierInfo *kw_endtable; + IdentifierInfo *kw_endtask; + IdentifierInfo *kw_forever; + IdentifierInfo *kw_fork; + IdentifierInfo *kw_generate; + IdentifierInfo *kw_highz0; + IdentifierInfo *kw_highz1; + IdentifierInfo *kw_iff; + IdentifierInfo *kw_ifnone; + IdentifierInfo *kw_ignore_bins; + IdentifierInfo *kw_illegal_bins; + IdentifierInfo *kw_initial; + IdentifierInfo *kw_inout; + IdentifierInfo *kw_input; + IdentifierInfo *kw_inside; + IdentifierInfo *kw_interconnect; + IdentifierInfo *kw_intersect; + IdentifierInfo *kw_join; + IdentifierInfo *kw_join_any; + IdentifierInfo *kw_join_none; + IdentifierInfo *kw_large; + IdentifierInfo *kw_local; + IdentifierInfo *kw_localparam; + IdentifierInfo *kw_macromodule; + IdentifierInfo *kw_matches; + IdentifierInfo *kw_medium; + IdentifierInfo *kw_output; + IdentifierInfo *kw_packed; + IdentifierInfo *kw_parameter; + IdentifierInfo *kw_primitive; + IdentifierInfo *kw_priority; + IdentifierInfo *kw_program; + IdentifierInfo *kw_property; + IdentifierInfo *kw_pull0; + IdentifierInfo *kw_pull1; + IdentifierInfo *kw_pure; + IdentifierInfo *kw_rand; + IdentifierInfo *kw_randc; + IdentifierInfo *kw_randcase; + IdentifierInfo *kw_randsequence; + IdentifierInfo *kw_repeat; + IdentifierInfo *kw_sample; + IdentifierInfo *kw_scalared; + IdentifierInfo *kw_sequence; + IdentifierInfo *kw_small; + IdentifierInfo *kw_soft; + IdentifierInfo *kw_solve; + IdentifierInfo *kw_specify; + IdentifierInfo *kw_specparam; + IdentifierInfo *kw_strong0; + IdentifierInfo *kw_strong1; + IdentifierInfo *kw_supply0; + IdentifierInfo *kw_supply1; + IdentifierInfo *kw_table; + IdentifierInfo *kw_tagged; + IdentifierInfo *kw_task; + IdentifierInfo *kw_tri; + IdentifierInfo *kw_tri0; + IdentifierInfo *kw_tri1; + IdentifierInfo *kw_triand; + IdentifierInfo *kw_trior; + IdentifierInfo *kw_trireg; + IdentifierInfo *kw_unique; + IdentifierInfo *kw_unique0; + IdentifierInfo *kw_uwire; + IdentifierInfo *kw_vectored; + IdentifierInfo *kw_wand; + IdentifierInfo *kw_weak0; + IdentifierInfo *kw_weak1; + IdentifierInfo *kw_wildcard; + IdentifierInfo *kw_wire; + IdentifierInfo *kw_with; + IdentifierInfo *kw_wor; + /// Returns \c true if \p Tok is a true JavaScript identifier, returns /// \c false if it is a keyword or a pseudo keyword. /// If \c AcceptIdentifierName is true, returns true not only for keywords, @@ -1139,7 +1411,7 @@ bool AcceptIdentifierName = true) const { // Based on the list of JavaScript & TypeScript keywords here: // https://github.com/microsoft/TypeScript/blob/main/src/compiler/scanner.ts#L74 - switch (Tok.Tok.getKind()) { + switch (Tok.getKind()) { case tok::kw_break: case tok::kw_case: case tok::kw_catch: @@ -1187,7 +1459,7 @@ break; } - switch (Tok.Tok.getKind()) { + switch (Tok.getKind()) { // Handle C++ keywords not included above: these are all JS identifiers. #define KEYWORD(X, Y) case tok::kw_##X: #include "clang/Basic/TokenKinds.def" @@ -1203,7 +1475,7 @@ /// Returns \c true if \p Tok is a C# keyword, returns /// \c false if it is a anything else. bool isCSharpKeyword(const FormatToken &Tok) const { - switch (Tok.Tok.getKind()) { + switch (Tok.getKind()) { case tok::kw_bool: case tok::kw_break: case tok::kw_case: @@ -1256,12 +1528,70 @@ } } + bool isVerilogIdentifier(const FormatToken &Tok) const { + switch (Tok.Tok.getKind()) { + case tok::kw_case: + case tok::kw_class: + case tok::kw_const: + case tok::kw_continue: + case tok::kw_default: + case tok::kw_do: + case tok::kw_extern: + case tok::kw_else: + case tok::kw_enum: + case tok::kw_for: + case tok::kw_if: + case tok::kw_restrict: + case tok::kw_signed: + case tok::kw_static: + case tok::kw_struct: + case tok::kw_typedef: + case tok::kw_union: + case tok::kw_unsigned: + case tok::kw_virtual: + case tok::kw_while: + return false; + case tok::identifier: + return VerilogExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == + VerilogExtraKeywords.end(); + default: + // getIdentifierInfo returns non-null for both identifiers and keywords. + return Tok.Tok.getIdentifierInfo() != nullptr; + } + } + + /// Returns whether \p Tok is a Verilog keyword that opens a block. + bool isVerilogBegin(const FormatToken &Tok) const { + // `table` is not included since it needs to be treated specially. + return !Tok.endsSequence(kw_fork, kw_disable) && + Tok.isOneOf(kw_begin, kw_fork, kw_generate, kw_specify); + } + + /// Returns whether \p Tok is a Verilog keyword that closes a block. + bool isVerilogEnd(const FormatToken &Tok) const { + return !Tok.endsSequence(kw_join, kw_rand) && + Tok.isOneOf(kw_end, kw_endcase, kw_endclass, kw_endclocking, + kw_endchecker, kw_endfunction, kw_endgenerate, + kw_endgroup, kw_endinterface, kw_endmodule, + kw_endpackage, kw_endprimitive, kw_endprogram, + kw_endproperty, kw_endsequence, kw_endspecify, + kw_endtable, kw_endtask, kw_join_any, kw_join_none); + } + + /// Whether the token begins a block. + bool isBlockBegin(const FormatToken &Tok, const FormatStyle &Style) const { + return Style.isVerilog() ? isVerilogBegin(Tok) : Tok.is(tok::l_brace); + } + private: /// The JavaScript keywords beyond the C++ keyword set. std::unordered_set JsExtraKeywords; /// The C# keywords beyond the C++ keyword set std::unordered_set CSharpExtraKeywords; + + /// The Verilog keywords beyond the C++ keyword set. + std::unordered_set VerilogExtraKeywords; }; } // namespace format diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -37,7 +37,7 @@ // FIXME: This is copy&pasted from Sema. Put it in a common place and remove // duplication. bool FormatToken::isSimpleTypeSpecifier() const { - switch (Tok.getKind()) { + switch (getKind()) { case tok::kw_short: case tok::kw_long: case tok::kw___int64: diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp --- a/clang/lib/Format/QualifierAlignmentFixer.cpp +++ b/clang/lib/Format/QualifierAlignmentFixer.cpp @@ -469,7 +469,7 @@ bool LeftRightQualifierAlignmentFixer::isQualifierOrType( const FormatToken *Tok, const std::vector &specifiedTypes) { return Tok && (Tok->isSimpleTypeSpecifier() || Tok->is(tok::kw_auto) || - llvm::is_contained(specifiedTypes, Tok->Tok.getKind())); + llvm::is_contained(specifiedTypes, Tok->getKind())); } // If a token is an identifier and it's upper case, it could diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -182,7 +182,7 @@ bool parseUntouchableParens() { while (CurrentToken) { CurrentToken->Finalized = true; - switch (CurrentToken->Tok.getKind()) { + switch (CurrentToken->getKind()) { case tok::l_paren: next(); if (!parseUntouchableParens()) @@ -573,8 +573,7 @@ Parent->isUnaryOperator() || // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) || - (getBinOpPrecedence(Parent->Tok.getKind(), true, true) > - prec::Unknown)); + (getBinOpPrecedence(Parent->getKind(), true, true) > prec::Unknown)); bool ColonFound = false; unsigned BindingIncrease = 1; @@ -876,7 +875,7 @@ bool consumeToken() { FormatToken *Tok = CurrentToken; next(); - switch (Tok->Tok.getKind()) { + switch (Tok->getKind()) { case tok::plus: case tok::minus: if (!Tok->Previous && Line.MustBeDeclaration) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -425,7 +425,7 @@ } } if (PreviousLine && TheLine->First->is(tok::l_brace)) { - switch (PreviousLine->First->Tok.getKind()) { + switch (PreviousLine->First->getKind()) { case tok::at: // Don't merge block with left brace wrapped after ObjC special blocks. if (PreviousLine->First->Next) { diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -413,7 +413,7 @@ void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { do { - switch (FormatTok->Tok.getKind()) { + switch (FormatTok->getKind()) { case tok::l_brace: return; default: @@ -432,7 +432,7 @@ void UnwrappedLineParser::parseCSharpAttribute() { int UnpairedSquareBrackets = 1; do { - switch (FormatTok->Tok.getKind()) { + switch (FormatTok->getKind()) { case tok::r_square: nextToken(); --UnpairedSquareBrackets; @@ -484,7 +484,7 @@ nextToken(); continue; } - tok::TokenKind kind = FormatTok->Tok.getKind(); + tok::TokenKind kind = FormatTok->getKind(); if (FormatTok->getType() == TT_MacroBlockBegin) kind = tok::l_brace; else if (FormatTok->getType() == TT_MacroBlockEnd) @@ -604,7 +604,7 @@ NextTok = Tokens->getNextToken(); } while (NextTok->is(tok::comment)); - switch (Tok->Tok.getKind()) { + switch (Tok->getKind()) { case tok::l_brace: if (Style.isJavaScript() && PrevTok) { if (PrevTok->isOneOf(tok::colon, tok::less)) @@ -756,7 +756,19 @@ bool MunchSemi, bool UnindentWhitesmithsBraces, bool CanContainBracedList, TokenType NextLBracesType) { - assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && + auto HandleVerilogBlockLabel = [this]() { + // ":" name + if (Style.Language == FormatStyle::LK_Verilog && + FormatTok->is(tok::colon)) { + nextToken(); + if (Keywords.isVerilogIdentifier(*FormatTok)) + nextToken(); + } + }; + + assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || + (Style.Language == FormatStyle::LK_Verilog && + Keywords.isVerilogBegin(*FormatTok))) && "'{' or macro block token expected"); FormatToken *Tok = FormatTok; const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); @@ -771,6 +783,7 @@ unsigned InitialLevel = Line->Level; nextToken(/*LevelDifference=*/AddLevels); + HandleVerilogBlockLabel(); if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); @@ -822,6 +835,7 @@ // Munch the closing brace. nextToken(/*LevelDifference=*/-AddLevels); + HandleVerilogBlockLabel(); if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); @@ -893,7 +907,7 @@ static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken) { - tok::TokenKind Kind = InitialToken.Tok.getKind(); + tok::TokenKind Kind = InitialToken.getKind(); if (InitialToken.is(TT_NamespaceMacro)) Kind = tok::kw_namespace; @@ -1104,8 +1118,7 @@ FormatTok->Tok.setKind(tok::identifier); FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); nextToken(); - if (FormatTok->Tok.getKind() == tok::l_paren && - !FormatTok->hasWhitespaceBefore()) + if (FormatTok->getKind() == tok::l_paren && !FormatTok->hasWhitespaceBefore()) parseParens(); if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) Line->Level += PPBranchLevel + 1; @@ -1332,7 +1345,7 @@ addUnwrappedLine(); return; } - switch (FormatTok->Tok.getKind()) { + switch (FormatTok->getKind()) { case tok::kw_asm: nextToken(); if (FormatTok->is(tok::l_brace)) { @@ -1508,7 +1521,7 @@ } do { const FormatToken *Previous = FormatTok->Previous; - switch (FormatTok->Tok.getKind()) { + switch (FormatTok->getKind()) { case tok::at: nextToken(); if (FormatTok->is(tok::l_brace)) { @@ -1886,7 +1899,7 @@ addUnwrappedLine(); nextToken(); do { - switch (FormatTok->Tok.getKind()) { + switch (FormatTok->getKind()) { case tok::r_brace: nextToken(); if (FormatTok->is(tok::equal)) { @@ -1948,7 +1961,7 @@ nextToken(); continue; } - switch (FormatTok->Tok.getKind()) { + switch (FormatTok->getKind()) { case tok::l_brace: break; case tok::l_paren: @@ -2143,13 +2156,13 @@ parseChildBlock(); } } - if (FormatTok->Tok.getKind() == ClosingBraceKind) { + if (FormatTok->getKind() == ClosingBraceKind) { if (IsEnum && !Style.AllowShortEnumsOnASingleLine) addUnwrappedLine(); nextToken(); return !HasError; } - switch (FormatTok->Tok.getKind()) { + switch (FormatTok->getKind()) { case tok::l_square: if (Style.isCSharp()) parseSquare(); @@ -2216,7 +2229,7 @@ assert(FormatTok->is(tok::l_paren) && "'(' expected."); nextToken(); do { - switch (FormatTok->Tok.getKind()) { + switch (FormatTok->getKind()) { case tok::l_paren: parseParens(); if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) @@ -2286,7 +2299,7 @@ return; } do { - switch (FormatTok->Tok.getKind()) { + switch (FormatTok->getKind()) { case tok::l_paren: parseParens(); break; @@ -2435,7 +2448,7 @@ FormatToken *IfLeftBrace = nullptr; IfStmtKind IfBlockKind = IfStmtKind::NotIf; - if (FormatTok->is(tok::l_brace)) { + if (Keywords.isBlockBegin(*FormatTok, Style)) { IfLeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); IfBlockKind = parseBlock(); @@ -2465,7 +2478,7 @@ } nextToken(); handleAttributes(); - if (FormatTok->is(tok::l_brace)) { + if (Keywords.isBlockBegin(*FormatTok, Style)) { ElseLeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); if (parseBlock() == IfStmtKind::IfOnly) @@ -2717,7 +2730,7 @@ bool WrapRightBrace) { keepAncestorBraces(); - if (FormatTok->is(tok::l_brace)) { + if (Keywords.isBlockBegin(*FormatTok, Style)) { FormatToken *LeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(); @@ -2916,7 +2929,7 @@ nextToken(); addUnwrappedLine(); } else if (!FormatTok->is(tok::coloncolon) && - !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { + !isCOperatorFollowingVar(FormatTok->getKind())) { // Not a variable name nor namespace name. addUnwrappedLine(); } else if (AccessSpecifierCandidate) { @@ -2956,7 +2969,7 @@ // that we first consume the keyword and check the next token. nextToken(); - switch (FormatTok->Tok.getKind()) { + switch (FormatTok->getKind()) { case tok::l_brace: // This can only be an expression, never a clause. parseRequiresExpression(RequiresToken); @@ -2987,7 +3000,7 @@ return true; } - switch (PreviousNonComment->Tok.getKind()) { + switch (PreviousNonComment->getKind()) { case tok::greater: case tok::r_paren: case tok::kw_noexcept: @@ -3034,7 +3047,7 @@ int OpenAngles = 0; for (; NextTokenOffset < 50; PeekNext()) { - switch (NextToken->Tok.getKind()) { + switch (NextToken->getKind()) { case tok::kw_volatile: case tok::kw_const: case tok::comma: @@ -3148,7 +3161,7 @@ do { bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); - switch (FormatTok->Tok.getKind()) { + switch (FormatTok->getKind()) { case tok::kw_requires: { auto RequiresToken = FormatTok; nextToken(); @@ -3252,7 +3265,7 @@ // ended before that), and basically all other cases. But it's easier to // check the other way around. assert(FormatTok->Previous); - switch (FormatTok->Previous->Tok.getKind()) { + switch (FormatTok->Previous->getKind()) { case tok::coloncolon: // Nested identifier. case tok::ampamp: // Start of a function or variable for the case tok::pipepipe: // constraint expression. @@ -3555,7 +3568,7 @@ } auto GetBraceType = [](const FormatToken &RecordTok) { - switch (RecordTok.Tok.getKind()) { + switch (RecordTok.getKind()) { case tok::kw_class: return TT_ClassLBrace; case tok::kw_struct: @@ -3984,6 +3997,16 @@ else readTokenWithJavaScriptASI(); FormatTok->Previous = Previous; + if (Style.isVerilog()) { + // Blocks in Verilog can have `begin` and `end` instead of braces. For + // keywords like `begin`, we can't treat them the same as left braces + // because some contexts require one of them. For example structs use + // braces and if blocks use keywords, and a left brace can occur in an if + // statement, but it is not a block. For keywords like `end`, we simply + // treat them the same as right braces. + if (Keywords.isVerilogEnd(*FormatTok)) + FormatTok->AliasToken = tok::r_brace; + } } void UnwrappedLineParser::distributeComments( diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -79,7 +79,18 @@ "assume-filename", cl::desc("Override filename used to determine the language.\n" "When reading from stdin, clang-format assumes this\n" - "filename to determine the language."), + "filename to determine the language.\n" + "Unrecognized filenames are treated as C++.\n" + "supported:\n" + " CSharp: .cs\n" + " Java: .java\n" + " JavaScript: .mjs .js .ts\n" + " Json: .json\n" + " Objective-C: .m .mm\n" + " Proto: .proto .protodevel\n" + " TableGen: .td\n" + " TextProto: .textpb .pb.txt .textproto .asciipb\n" + " Verilog: .sv .svh .v .vh"), cl::init(""), cl::cat(ClangFormatCategory)); static cl::opt Inplace("i", diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt --- a/clang/unittests/Format/CMakeLists.txt +++ b/clang/unittests/Format/CMakeLists.txt @@ -17,6 +17,7 @@ FormatTestSelective.cpp FormatTestTableGen.cpp FormatTestTextProto.cpp + FormatTestVerilog.cpp MacroExpanderTest.cpp NamespaceEndCommentsFixerTest.cpp QualifierFixerTest.cpp diff --git a/clang/unittests/Format/FormatTestUtils.h b/clang/unittests/Format/FormatTestUtils.h --- a/clang/unittests/Format/FormatTestUtils.h +++ b/clang/unittests/Format/FormatTestUtils.h @@ -19,7 +19,10 @@ namespace format { namespace test { -inline std::string messUp(llvm::StringRef Code) { +// When HandleHash is false, preprocessor directives starting with hash will not +// be on separate lines. This is needed because Verilog uses hash for other +// purposes. +inline std::string messUp(llvm::StringRef Code, bool HandleHash = true) { std::string MessedUp(Code.str()); bool InComment = false; bool InPreprocessorDirective = false; @@ -29,7 +32,7 @@ if (JustReplacedNewline) MessedUp[i - 1] = '\n'; InComment = true; - } else if (MessedUp[i] == '#' && + } else if (HandleHash && MessedUp[i] == '#' && (JustReplacedNewline || i == 0 || MessedUp[i - 1] == '\n')) { if (i != 0) MessedUp[i - 1] = '\n'; diff --git a/clang/unittests/Format/FormatTestVerilog.cpp b/clang/unittests/Format/FormatTestVerilog.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Format/FormatTestVerilog.cpp @@ -0,0 +1,118 @@ +//===- unittest/Format/FormatTestVerilog.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FormatTestUtils.h" +#include "clang/Format/Format.h" +#include "llvm/Support/Debug.h" +#include "gtest/gtest.h" + +#define DEBUG_TYPE "format-test" + +namespace clang { +namespace format { + +class FormatTestVerilog : public ::testing::Test { +protected: + static std::string format(llvm::StringRef Code, unsigned Offset, + unsigned Length, const FormatStyle &Style) { + LLVM_DEBUG(llvm::errs() << "---\n"); + LLVM_DEBUG(llvm::errs() << Code << "\n\n"); + std::vector Ranges(1, tooling::Range(Offset, Length)); + tooling::Replacements Replaces = reformat(Style, Code, Ranges); + auto Result = applyAllReplacements(Code, Replaces); + EXPECT_TRUE(static_cast(Result)); + LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n"); + return *Result; + } + + static std::string format(llvm::StringRef Code, const FormatStyle &Style) { + return format(Code, 0, Code.size(), Style); + } + + static void verifyFormat( + llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle(FormatStyle::LK_Verilog)) { + EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable"; + EXPECT_EQ(Code.str(), + format(test::messUp(Code, /*HandleHash=*/false), Style)); + } +}; + +TEST_F(FormatTestVerilog, If) { + verifyFormat("if (x)\n" + " x = x;"); + verifyFormat("if (x)\n" + " x = x;\n" + "x = x;"); + + // Test else + verifyFormat("if (x)\n" + " x = x;\n" + "else if (x)\n" + " x = x;\n" + "else\n" + " x = x;"); + verifyFormat("if (x) begin\n" + " x = x;\n" + "end else if (x) begin\n" + " x = x;\n" + "end else begin\n" + " x = x;\n" + "end"); + verifyFormat("if (x) begin : x\n" + " x = x;\n" + "end : x else if (x) begin : x\n" + " x = x;\n" + "end : x else begin : x\n" + " x = x;\n" + "end : x"); + + // Test block keywords. + verifyFormat("if (x) begin\n" + " x = x;\n" + "end"); + verifyFormat("if (x) begin : x\n" + " x = x;\n" + "end : x"); + verifyFormat("if (x) begin\n" + " x = x;\n" + " x = x;\n" + "end"); + verifyFormat("disable fork;\n" + "x = x;"); + verifyFormat("rand join x x;\n" + "x = x;"); + verifyFormat("if (x) fork\n" + " x = x;\n" + "join"); + verifyFormat("if (x) fork\n" + " x = x;\n" + "join_any"); + verifyFormat("if (x) fork\n" + " x = x;\n" + "join_none"); + verifyFormat("if (x) generate\n" + " x = x;\n" + "endgenerate"); + verifyFormat("if (x) generate : x\n" + " x = x;\n" + "endgenerate : x"); + + // Test that concatenation braces don't get regarded as blocks. + verifyFormat("if (x)\n" + " {x} = x;"); + verifyFormat("if (x)\n" + " x = {x};"); + verifyFormat("if (x)\n" + " x = {x};\n" + "else\n" + " {x} = {x};"); +} + +} // namespace format +} // end namespace clang