diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -984,6 +984,7 @@ kw_automatic = &IdentTable.get("automatic"); kw_before = &IdentTable.get("before"); kw_begin = &IdentTable.get("begin"); + kw_begin_keywords = &IdentTable.get("begin_keywords"); kw_bins = &IdentTable.get("bins"); kw_binsof = &IdentTable.get("binsof"); kw_casex = &IdentTable.get("casex"); @@ -995,10 +996,20 @@ kw_cover = &IdentTable.get("cover"); kw_covergroup = &IdentTable.get("covergroup"); kw_coverpoint = &IdentTable.get("coverpoint"); + kw_default_decay_time = &IdentTable.get("default_decay_time"); + kw_default_nettype = &IdentTable.get("default_nettype"); + kw_default_trireg_strength = &IdentTable.get("default_trireg_strength"); + kw_delay_mode_distributed = &IdentTable.get("delay_mode_distributed"); + kw_delay_mode_path = &IdentTable.get("delay_mode_path"); + kw_delay_mode_unit = &IdentTable.get("delay_mode_unit"); + kw_delay_mode_zero = &IdentTable.get("delay_mode_zero"); kw_disable = &IdentTable.get("disable"); kw_dist = &IdentTable.get("dist"); + kw_elsif = &IdentTable.get("elsif"); kw_end = &IdentTable.get("end"); + kw_end_keywords = &IdentTable.get("end_keywords"); kw_endcase = &IdentTable.get("endcase"); + kw_endcelldefine = &IdentTable.get("endcelldefine"); kw_endchecker = &IdentTable.get("endchecker"); kw_endclass = &IdentTable.get("endclass"); kw_endclocking = &IdentTable.get("endclocking"); @@ -1039,6 +1050,7 @@ kw_macromodule = &IdentTable.get("macromodule"); kw_matches = &IdentTable.get("matches"); kw_medium = &IdentTable.get("medium"); + kw_nounconnected_drive = &IdentTable.get("nounconnected_drive"); kw_output = &IdentTable.get("output"); kw_packed = &IdentTable.get("packed"); kw_parameter = &IdentTable.get("parameter"); @@ -1054,6 +1066,7 @@ kw_randcase = &IdentTable.get("randcase"); kw_randsequence = &IdentTable.get("randsequence"); kw_repeat = &IdentTable.get("repeat"); + kw_resetall = &IdentTable.get("resetall"); kw_sample = &IdentTable.get("sample"); kw_scalared = &IdentTable.get("scalared"); kw_sequence = &IdentTable.get("sequence"); @@ -1069,12 +1082,15 @@ kw_table = &IdentTable.get("table"); kw_tagged = &IdentTable.get("tagged"); kw_task = &IdentTable.get("task"); + kw_timescale = &IdentTable.get("timescale"); kw_tri = &IdentTable.get("tri"); kw_tri0 = &IdentTable.get("tri0"); kw_tri1 = &IdentTable.get("tri1"); kw_triand = &IdentTable.get("triand"); kw_trior = &IdentTable.get("trior"); kw_trireg = &IdentTable.get("trireg"); + kw_unconnected_drive = &IdentTable.get("unconnected_drive"); + kw_undefineall = &IdentTable.get("undefineall"); kw_unique = &IdentTable.get("unique"); kw_unique0 = &IdentTable.get("unique0"); kw_uwire = &IdentTable.get("uwire"); @@ -1087,6 +1103,10 @@ kw_with = &IdentTable.get("with"); kw_wor = &IdentTable.get("wor"); + // Symbols that are treated as keywords. + verilogHash = &IdentTable.get("#"); + verilogHashHash = &IdentTable.get("##"); + // Keep this at the end of the constructor to make sure everything here // is // already initialized. @@ -1145,7 +1165,8 @@ kw_triand, kw_trior, kw_trireg, kw_unique, kw_unique0, kw_uwire, kw_var, kw_vectored, kw_wand, kw_weak0, kw_weak1, kw_wildcard, - kw_wire, kw_with, kw_wor}); + kw_wire, kw_with, kw_wor, verilogHash, + verilogHashHash}); } // Context sensitive keywords. @@ -1260,6 +1281,7 @@ IdentifierInfo *kw_automatic; IdentifierInfo *kw_before; IdentifierInfo *kw_begin; + IdentifierInfo *kw_begin_keywords; IdentifierInfo *kw_bins; IdentifierInfo *kw_binsof; IdentifierInfo *kw_casex; @@ -1271,10 +1293,20 @@ IdentifierInfo *kw_cover; IdentifierInfo *kw_covergroup; IdentifierInfo *kw_coverpoint; + IdentifierInfo *kw_default_decay_time; + IdentifierInfo *kw_default_nettype; + IdentifierInfo *kw_default_trireg_strength; + IdentifierInfo *kw_delay_mode_distributed; + IdentifierInfo *kw_delay_mode_path; + IdentifierInfo *kw_delay_mode_unit; + IdentifierInfo *kw_delay_mode_zero; IdentifierInfo *kw_disable; IdentifierInfo *kw_dist; + IdentifierInfo *kw_elsif; IdentifierInfo *kw_end; + IdentifierInfo *kw_end_keywords; IdentifierInfo *kw_endcase; + IdentifierInfo *kw_endcelldefine; IdentifierInfo *kw_endchecker; IdentifierInfo *kw_endclass; IdentifierInfo *kw_endclocking; @@ -1315,6 +1347,7 @@ IdentifierInfo *kw_macromodule; IdentifierInfo *kw_matches; IdentifierInfo *kw_medium; + IdentifierInfo *kw_nounconnected_drive; IdentifierInfo *kw_output; IdentifierInfo *kw_packed; IdentifierInfo *kw_parameter; @@ -1330,6 +1363,7 @@ IdentifierInfo *kw_randcase; IdentifierInfo *kw_randsequence; IdentifierInfo *kw_repeat; + IdentifierInfo *kw_resetall; IdentifierInfo *kw_sample; IdentifierInfo *kw_scalared; IdentifierInfo *kw_sequence; @@ -1345,12 +1379,15 @@ IdentifierInfo *kw_table; IdentifierInfo *kw_tagged; IdentifierInfo *kw_task; - IdentifierInfo *kw_tri; + IdentifierInfo *kw_timescale; IdentifierInfo *kw_tri0; IdentifierInfo *kw_tri1; + IdentifierInfo *kw_tri; IdentifierInfo *kw_triand; IdentifierInfo *kw_trior; IdentifierInfo *kw_trireg; + IdentifierInfo *kw_unconnected_drive; + IdentifierInfo *kw_undefineall; IdentifierInfo *kw_unique; IdentifierInfo *kw_unique0; IdentifierInfo *kw_uwire; @@ -1363,6 +1400,15 @@ IdentifierInfo *kw_with; IdentifierInfo *kw_wor; + // Workaround for hashes and backticks in Verilog. + IdentifierInfo *verilogHash; + IdentifierInfo *verilogHashHash; + + /// Returns \c true if \p Tok is a symbol defined here. + bool isAdditionalSymbol(const FormatToken &Tok) const { + return Tok.isOneOf(verilogHash, verilogHashHash); + } + /// Returns \c true if \p Tok is a true JavaScript identifier, returns /// \c false if it is a keyword or a pseudo keyword. /// If \c AcceptIdentifierName is true, returns true not only for keywords, @@ -1521,6 +1567,35 @@ } } + /// Returns whether \p Tok is a Verilog preprocessor directive. This is + /// needed because macro expansions start with a backtick as well and they + /// need to be treated differently. + bool isVerilogPPDirective(const FormatToken &Tok) const { + auto Info = Tok.Tok.getIdentifierInfo(); + if (!Info) + return false; + switch (Info->getPPKeywordID()) { + case tok::pp_define: + case tok::pp_else: + case tok::pp_endif: + case tok::pp_ifdef: + case tok::pp_ifndef: + case tok::pp_include: + case tok::pp_line: + case tok::pp_pragma: + case tok::pp_undef: + return true; + default: + return Tok.isOneOf(kw_begin_keywords, kw_celldefine, + kw_default_decay_time, kw_default_nettype, + kw_default_trireg_strength, kw_delay_mode_distributed, + kw_delay_mode_path, kw_delay_mode_unit, + kw_delay_mode_zero, kw_elsif, kw_end_keywords, + kw_endcelldefine, kw_nounconnected_drive, kw_resetall, + kw_timescale, kw_unconnected_drive, kw_undefineall); + } + } + /// Returns whether \p Tok is a Verilog keyword that opens a block. bool isVerilogBegin(const FormatToken &Tok) const { // `table` is not included since it needs to be treated specially. @@ -1536,8 +1611,8 @@ kw_endgenerate, kw_endgroup, kw_endinterface, kw_endmodule, kw_endpackage, kw_endprimitive, kw_endprogram, kw_endproperty, kw_endsequence, - kw_endspecify, kw_endtable, kw_endtask, kw_join_any, - kw_join_none); + kw_endspecify, kw_endtable, kw_endtask, kw_join, + kw_join_any, kw_join_none); } /// Whether the token begins a block. diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -126,6 +126,9 @@ // Targets that may appear inside a C# attribute. static const llvm::StringSet<> CSharpAttributeTargets; + /// Handle Verilog-specific tokens. + bool readRawTokenVerilogSpecific(Token &Tok); + void readRawToken(FormatToken &Tok); void resetLexer(unsigned Offset); diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -983,6 +983,25 @@ } } + if (Style.isVerilog()) { + // Verilog uses the backtick instead of the hash for preprocessor stuff. + // And it uses the hash for delays and parameter lists. In order to continue + // using `tok::hash` in other places, the backtick gets marked as the hash + // here. And in order to tell the backtick and hash apart for + // Verilog-specific stuff, the hash becomes an identifier. + if (FormatTok->isOneOf(tok::hash, tok::hashhash)) { + FormatTok->Tok.setKind(tok::raw_identifier); + } else if (FormatTok->is(tok::raw_identifier)) { + if (FormatTok->TokenText == "`") { + FormatTok->Tok.setIdentifierInfo(nullptr); + FormatTok->Tok.setKind(tok::hash); + } else if (FormatTok->TokenText == "``") { + FormatTok->Tok.setIdentifierInfo(nullptr); + FormatTok->Tok.setKind(tok::hashhash); + } + } + } + FormatTok->WhitespaceRange = SourceRange( WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); @@ -1073,8 +1092,50 @@ return FormatTok; } +bool FormatTokenLexer::readRawTokenVerilogSpecific(Token &Tok) { + // In Verilog the quote is not a character literal. + // + // Make the backtick and double backtick identifiers to match against them + // more easily. + // + // In Verilog an escaped identifier starts with backslash and ends with + // whitespace. Unless that whitespace is an escaped newline. A backslash can + // also begin an escaped newline outside of an escaped identifier. We check + // for that outside of the Regex since we can't use negative lookhead + // assertions. Simply changing the '*' to '+' breaks stuff as the escaped + // identifier may have a length of 0 according to Section A.9.3. + // FIXME: If there is an escaped newline in the middle of an escaped + // identifier, allow for pasting the two lines together, But escaped + // identifiers usually occur only in generated code anyway. + static const llvm::Regex VerilogToken( + "^(\'|``?|\\\\(\\\\(\r?\n|\r)|[^[:space:]])*)"); + + SmallVector Matches; + const char *Start = Lex->getBufferLocation(); + if (!VerilogToken.match(StringRef(Start, Lex->getBuffer().end() - Start), + &Matches)) + return false; + // There is a null byte at the end of the buffer, so we don't have to check + // Start[1] is within the buffer. + if (Start[0] == '\\' && (Start[1] == '\r' || Start[1] == '\n')) + return false; + size_t Len = Matches[0].size(); + + Tok.setLength(Len); + Tok.setLocation(Lex->getSourceLocation(Start, Len)); + // The kind has to be an identifier so we can match it against those defined + // in Keywords. + Tok.setKind(tok::raw_identifier); + Tok.setRawIdentifierData(Start); + Lex->skipOver(Len); + return true; +} + void FormatTokenLexer::readRawToken(FormatToken &Tok) { - Lex->LexFromRawLexer(Tok.Tok); + // For Verilog, first see if there is a special token, and fall back to the + // normal lexer if there isn't one. + if (!(Style.isVerilog() && readRawTokenVerilogSpecific(Tok.Tok))) + Lex->LexFromRawLexer(Tok.Tok); Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), Tok.Tok.getLength()); // For formatting, treat unterminated string literals like normal string diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1298,6 +1298,10 @@ // sequence. if (!CurrentToken->Tok.getIdentifierInfo()) return Type; + // In Verilog macro expansions start with a backtick just like preprocessor + // directives. Thus we stop if the word is not a preprocessor directive. + if (Style.isVerilog() && !Keywords.isVerilogPPDirective(*CurrentToken)) + return LT_Invalid; switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { case tok::pp_include: case tok::pp_include_next: @@ -1339,8 +1343,14 @@ if (!CurrentToken) return LT_Invalid; NonTemplateLess.clear(); - if (CurrentToken->is(tok::hash)) - return parsePreprocessorDirective(); + if (CurrentToken->is(tok::hash)) { + // We were not yet allowed to use C++17 optional when this was being + // written. So we used LT_Invalid to mark that the line is not a + // preprocessor directive. + auto Type = parsePreprocessorDirective(); + if (Type != LT_Invalid) + return Type; + } // Directly allow to 'import ' to support protocol buffer // definitions (github.com/google/protobuf) or missing "#" (either way we @@ -3501,8 +3511,11 @@ if (Left.Finalized) return Right.hasWhitespaceBefore(); - if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo()) - return true; // Never ever merge two identifiers. + // Never ever merge two words. getIdentifierInfo() returns non-null for + // keywords as well as identifiers. + if (Right.Tok.getIdentifierInfo() && !Keywords.isAdditionalSymbol(Right) && + Left.Tok.getIdentifierInfo() && !Keywords.isAdditionalSymbol(Left)) + return true; // Leave a space between * and /* to avoid C4138 `comment end` found outside // of comment. @@ -3742,6 +3755,18 @@ Keywords.kw_native)) && Right.is(TT_TemplateOpener)) return true; + } else if (Style.isVerilog()) { + // Don't add space within a delay like `#0`. + if (!Left.is(TT_BinaryOperator) && + Left.isOneOf(Keywords.verilogHash, Keywords.verilogHashHash)) + return false; + // Add space after a delay. + if (!Right.is(tok::semi) && + (Left.endsSequence(tok::numeric_constant, tok::hash) || + Left.endsSequence(tok::numeric_constant, tok::hashhash) || + (Left.is(tok::r_paren) && Left.MatchingParen && + Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) + return true; } if (Left.is(TT_ImplicitStringLiteral)) return Right.hasWhitespaceBefore(); diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1829,8 +1829,23 @@ break; TokenCount = Line->Tokens.size(); - if (TokenCount == 1 || - (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { + // Determine whether the line might be a single macro expansion. + // In Verilog macro expansions begin with a backtick. + { + const UnwrappedLineNode *Tok = &Line->Tokens.front(), + *End = Tok + TokenCount; + while (Tok != End && Tok->Tok->is(tok::comment)) + ++Tok; + if (Style.Language == FormatStyle::LK_Verilog) { + if (Tok != End && Tok->Tok->is(tok::hash)) + ++Tok; + else + break; + } + if (End - Tok != 1) + break; + } + if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) { Line->Tokens.begin()->Tok->MustBreakBefore = true; parseLabel(!Style.IndentGotoLabels); @@ -1855,7 +1870,6 @@ PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); addUnwrappedLine(); return; - } } break; } @@ -4156,6 +4170,8 @@ PreviousWasComment = FormatTok->is(tok::comment); while (!Line->InPPDirective && FormatTok->is(tok::hash) && + (!Style.isVerilog() || + Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && FirstNonCommentOnLine) { distributeComments(Comments, FormatTok); Comments.clear(); diff --git a/clang/unittests/Format/FormatTestVerilog.cpp b/clang/unittests/Format/FormatTestVerilog.cpp --- a/clang/unittests/Format/FormatTestVerilog.cpp +++ b/clang/unittests/Format/FormatTestVerilog.cpp @@ -43,6 +43,30 @@ } }; +TEST_F(FormatTestVerilog, Delay) { + // Delay by the default unit. + verifyFormat("#0;"); + verifyFormat("#1;"); + verifyFormat("#10;"); + verifyFormat("#1.5;"); + // Explicit unit. + verifyFormat("#1fs;"); + verifyFormat("#1.5fs;"); + verifyFormat("#1ns;"); + verifyFormat("#1.5ns;"); + verifyFormat("#1us;"); + verifyFormat("#1.5us;"); + verifyFormat("#1ms;"); + verifyFormat("#1.5ms;"); + verifyFormat("#1s;"); + verifyFormat("#1.5s;"); + // The following expression should be on the same line. + verifyFormat("#1 x = x;"); + EXPECT_EQ("#1 x = x;", format("#1\n" + "x = x;", + getLLVMStyle(FormatStyle::LK_Verilog))); +} + TEST_F(FormatTestVerilog, If) { verifyFormat("if (x)\n" " x = x;"); @@ -114,5 +138,113 @@ " {x} = {x};"); } +TEST_F(FormatTestVerilog, Preprocessor) { + auto Style = getLLVMStyle(FormatStyle::LK_Verilog); + Style.ColumnLimit = 20; + + // Macro definitions. + EXPECT_EQ("`define X \\\n" + " if (x) \\\n" + " x = x;", + format("`define X if(x)x=x;", Style)); + EXPECT_EQ("`define X(x) \\\n" + " if (x) \\\n" + " x = x;", + format("`define X(x) if(x)x=x;", Style)); + EXPECT_EQ("`define X \\\n" + " x = x; \\\n" + " x = x;", + format("`define X x=x;x=x;", Style)); + // Macro definitions with invocations inside. + EXPECT_EQ("`define LIST \\\n" + " `ENTRY \\\n" + " `ENTRY", + format("`define LIST \\\n" + "`ENTRY \\\n" + "`ENTRY", + Style)); + EXPECT_EQ("`define LIST \\\n" + " `x = `x; \\\n" + " `x = `x;", + format("`define LIST \\\n" + "`x = `x; \\\n" + "`x = `x;", + Style)); + EXPECT_EQ("`define LIST \\\n" + " `x = `x; \\\n" + " `x = `x;", + format("`define LIST `x=`x;`x=`x;", Style)); + // Macro invocations. + verifyFormat("`x = (`x1 + `x2 + x);"); + // Lines starting with a preprocessor directive should not be indented. + std::string Directives[] = { + "begin_keywords", + "celldefine", + "default_nettype", + "define", + "else", + "elsif", + "end_keywords", + "endcelldefine", + "endif", + "ifdef", + "ifndef", + "include", + "line", + "nounconnected_drive", + "pragma", + "resetall", + "timescale", + "unconnected_drive", + "undef", + "undefineall", + }; + for (auto &Name : Directives) { + EXPECT_EQ("if (x)\n" + "`" + + Name + + "\n" + " ;", + format("if (x)\n" + "`" + + Name + + "\n" + ";", + Style)); + } + // Lines starting with a regular macro invocation should be indented as a + // normal line. + EXPECT_EQ("if (x)\n" + " `x = `x;\n" + "`timescale 1ns / 1ps", + format("if (x)\n" + "`x = `x;\n" + "`timescale 1ns / 1ps", + Style)); + EXPECT_EQ("if (x)\n" + "`timescale 1ns / 1ps\n" + " `x = `x;", + format("if (x)\n" + "`timescale 1ns / 1ps\n" + "`x = `x;", + Style)); + std::string NonDirectives[] = { + // For `__FILE__` and `__LINE__`, although the standard classifies them as + // preprocessor directives, they are used like regular macros. + "__FILE__", "__LINE__", "elif", "foo", "x", + }; + for (auto &Name : NonDirectives) { + EXPECT_EQ("if (x)\n" + " `" + + Name + ";", + format("if (x)\n" + "`" + + Name + + "\n" + ";", + Style)); + } +} + } // namespace format } // end namespace clang