diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -135,6 +135,8 @@ TYPE(CSharpGenericTypeConstraint) \ TYPE(CSharpGenericTypeConstraintColon) \ TYPE(CSharpGenericTypeConstraintComma) \ + TYPE(VerilogNumberBase) /* for the base in a number literal, not including \ + the quote */ \ TYPE(Unknown) /// Determines the semantic type of a syntactic token, e.g. whether "<" is a @@ -368,6 +370,9 @@ } bool isTypeFinalized() const { return TypeIsFinalized; } + /// Used to set an operator precedence explicitly. + prec::Level ForcedPrecedence = prec::Unknown; + /// The number of newlines immediately before the \c Token. /// /// This can be used to determine what the user wrote in the original code @@ -688,6 +693,8 @@ } prec::Level getPrecedence() const { + if (ForcedPrecedence != prec::Unknown) + return ForcedPrecedence; return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true, /*CPlusPlus11=*/true); } @@ -1110,6 +1117,7 @@ // Symbols that are treated as keywords. verilogHash = &IdentTable.get("#"); verilogHashHash = &IdentTable.get("##"); + quote = &IdentTable.get("\'"); // Keep this at the end of the constructor to make sure everything here // is @@ -1409,11 +1417,14 @@ IdentifierInfo *verilogHash; IdentifierInfo *verilogHashHash; + // Symbols in Verilog that don't exist in C++. + IdentifierInfo *quote; + /// Returns \c true if \p Tok is a keyword or an identifier. bool isWordLike(const FormatToken &Tok) const { // getIdentifierinfo returns non-null for keywords as well as identifiers. return Tok.Tok.getIdentifierInfo() != nullptr && - !Tok.isOneOf(verilogHash, verilogHashHash); + !Tok.isOneOf(verilogHash, verilogHashHash, quote); } /// Returns \c true if \p Tok is a true JavaScript identifier, returns @@ -1542,6 +1553,11 @@ } } + bool isVerilogWordOperator(const FormatToken &Tok) const { + return Tok.isOneOf(kw_before, kw_intersect, kw_dist, kw_iff, kw_inside, + kw_with); + } + bool isVerilogIdentifier(const FormatToken &Tok) const { switch (Tok.Tok.getKind()) { case tok::kw_case: diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -60,7 +60,14 @@ bool tryMergeForEach(); bool tryTransformTryUsageForC(); + // Merge the most lately lexed tokens into a single token if their kinds are + // correct. bool tryMergeTokens(ArrayRef Kinds, TokenType NewType); + // Merge without checking their kinds. + bool tryMergeTokens(size_t Count, TokenType NewType); + // Merge if their kinds match any one of Kinds. + bool tryMergeTokensAny(ArrayRef> Kinds, + TokenType NewType); // Returns \c true if \p Tok can only be followed by an operand in JavaScript. bool precedesOperand(FormatToken *Tok); diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -193,6 +193,75 @@ if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator)) return; } + + if (Style.isVerilog()) { + // Merge the number following a base like `'h?a0`. + if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) && + Tokens.end()[-2]->is(tok::numeric_constant) && + Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier, + tok::question) && + tryMergeTokens(2, TT_Unknown)) + return; + // Part select. + if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}}, + TT_BitFieldColon)) + return; + // Xnor. The combined token is treated as a caret which can also be either a + // unary or binary operator. The actual type is determined in + // TokenAnnotator. We also check the token length so we know it is not + // already a merged token. + if (Tokens.back()->TokenText.size() == 1 && + tryMergeTokensAny({{tok::caret, tok::tilde}, {tok::tilde, tok::caret}}, + TT_BinaryOperator)) { + Tokens.back()->Tok.setKind(tok::caret); + return; + } + // Signed shift and distribution weight. + if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) { + Tokens.back()->Tok.setKind(tok::lessless); + return; + } + if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) { + Tokens.back()->Tok.setKind(tok::greatergreater); + return; + } + if (tryMergeTokensAny({{tok::lessless, tok::equal}, + {tok::lessless, tok::lessequal}, + {tok::greatergreater, tok::equal}, + {tok::greatergreater, tok::greaterequal}, + {tok::colon, tok::equal}, + {tok::colon, tok::slash}}, + TT_BinaryOperator)) { + Tokens.back()->ForcedPrecedence = prec::Assignment; + return; + } + // Exponentiation, signed shift, case equality, and wildcard equality. + if (tryMergeTokensAny({{tok::star, tok::star}, + {tok::lessless, tok::less}, + {tok::greatergreater, tok::greater}, + {tok::exclaimequal, tok::equal}, + {tok::exclaimequal, tok::question}, + {tok::equalequal, tok::equal}, + {tok::equalequal, tok::question}}, + TT_BinaryOperator)) + return; + // Module paths in specify blocks and implications in properties. + if (tryMergeTokensAny({{tok::plusequal, tok::greater}, + {tok::plus, tok::star, tok::greater}, + {tok::minusequal, tok::greater}, + {tok::minus, tok::star, tok::greater}, + {tok::less, tok::arrow}, + {tok::equal, tok::greater}, + {tok::star, tok::greater}, + {tok::pipeequal, tok::greater}, + {tok::pipe, tok::arrow}, + {tok::hash, tok::minus, tok::hash}, + {tok::hash, tok::equal, tok::hash}}, + TT_BinaryOperator)) { + Tokens.back()->ForcedPrecedence = prec::Comma; + return; + } + } } bool FormatTokenLexer::tryMergeNSStringLiteral() { @@ -461,15 +530,28 @@ SmallVectorImpl::const_iterator First = Tokens.end() - Kinds.size(); - if (!First[0]->is(Kinds[0])) + for (unsigned i = 0; i < Kinds.size(); ++i) + if (!First[i]->is(Kinds[i])) + return false; + + return tryMergeTokens(Kinds.size(), NewType); +} + +bool FormatTokenLexer::tryMergeTokens(size_t Count, TokenType NewType) { + if (Tokens.size() < Count) return false; + + SmallVectorImpl::const_iterator First = Tokens.end() - Count; unsigned AddLength = 0; - for (unsigned i = 1; i < Kinds.size(); ++i) { - if (!First[i]->is(Kinds[i]) || First[i]->hasWhitespaceBefore()) + for (size_t i = 1; i < Count; ++i) { + // If there is whitespace separating the token and the previous one, + // they should not be merged. + if (First[i]->hasWhitespaceBefore()) return false; AddLength += First[i]->TokenText.size(); } - Tokens.resize(Tokens.size() - Kinds.size() + 1); + + Tokens.resize(Tokens.size() - Count + 1); First[0]->TokenText = StringRef(First[0]->TokenText.data(), First[0]->TokenText.size() + AddLength); First[0]->ColumnWidth += AddLength; @@ -477,6 +559,14 @@ return true; } +bool FormatTokenLexer::tryMergeTokensAny( + ArrayRef> Kinds, TokenType NewType) { + return std::any_of(Kinds.begin(), Kinds.end(), + [this, NewType](ArrayRef Kinds) { + return tryMergeTokens(Kinds, NewType); + }); +} + // Returns \c true if \p Tok can only be followed by an operand in JavaScript. bool FormatTokenLexer::precedesOperand(FormatToken *Tok) { // NB: This is not entirely correct, as an r_paren can introduce an operand @@ -988,12 +1078,19 @@ } if (Style.isVerilog()) { + static const llvm::Regex NumberBase("^s?[bdho]", llvm::Regex::IgnoreCase); + SmallVector Matches; // Verilog uses the backtick instead of the hash for preprocessor stuff. // And it uses the hash for delays and parameter lists. In order to continue // using `tok::hash` in other places, the backtick gets marked as the hash // here. And in order to tell the backtick and hash apart for // Verilog-specific stuff, the hash becomes an identifier. - if (FormatTok->isOneOf(tok::hash, tok::hashhash)) { + if (FormatTok->is(tok::numeric_constant)) { + // In Verilog the quote is not part of a number. + auto Quote = FormatTok->TokenText.find('\''); + if (Quote != StringRef::npos) + resizeToken(Quote); + } else if (FormatTok->isOneOf(tok::hash, tok::hashhash)) { FormatTok->Tok.setKind(tok::raw_identifier); } else if (FormatTok->is(tok::raw_identifier)) { if (FormatTok->TokenText == "`") { @@ -1002,6 +1099,14 @@ } else if (FormatTok->TokenText == "``") { FormatTok->Tok.setIdentifierInfo(nullptr); FormatTok->Tok.setKind(tok::hashhash); + } else if (Tokens.size() != 0 && Tokens.back()->is(Keywords.quote) && + NumberBase.match(FormatTok->TokenText, &Matches)) { + // In Verilog a in based number literal like `'b10`, there may be + // whitespace between `'b` and `10`. Therefore we handle the base and + // the rest of the number literal as two tokens. But if there is no + // space in the input code, we need to manually separate the two parts. + resizeToken(Matches[0].size()); + FormatTok->setFinalizedType(TT_VerilogNumberBase); } } } @@ -1044,6 +1149,12 @@ StateStack.push(LexerState::TOKEN_STASHED); } + if (Style.isVerilog() && Tokens.size() != 0 && + Tokens.back()->is(TT_VerilogNumberBase) && + FormatTok->Tok.isOneOf(tok::identifier, tok::question)) + // Mark the number following a base like `'h?a0` as a number. + FormatTok->Tok.setKind(tok::numeric_constant); + // Now FormatTok is the next non-whitespace token. StringRef Text = FormatTok->TokenText; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1841,7 +1841,8 @@ Current, Contexts.back().CanBeExpression && Contexts.back().IsExpression, Contexts.back().ContextType == Context::TemplateArgument)); - } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { + } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret) || + (Style.isVerilog() && Current.is(tok::pipe))) { Current.setType(determinePlusMinusCaretUsage(Current)); if (Current.is(TT_UnaryOperator) && Current.is(tok::caret)) Contexts.back().CaretFound = true; @@ -3954,6 +3955,21 @@ (Left.is(tok::r_paren) && Left.MatchingParen && Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) return true; + // Don't add embedded spaces in a number literal like `16'h1?ax` or an array + // literal like `'{}`. + if (Left.is(Keywords.quote) || + (Left.is(TT_VerilogNumberBase) && Right.is(tok::numeric_constant))) + return false; + // Don't add spaces between a casting type and the quote or repetition count + // and the brace. + if ((Right.is(Keywords.quote) || + (Right.is(BK_BracedInit) && Right.is(tok::l_brace))) && + !(Left.isOneOf(Keywords.kw_assign, Keywords.kw_unique) || + Keywords.isVerilogWordOperator(Left)) && + (Left.isOneOf(tok::r_square, tok::r_paren, tok::r_brace, + tok::numeric_constant) || + Keywords.isWordLike(Left))) + return false; } if (Left.is(TT_ImplicitStringLiteral)) return Right.hasWhitespaceBefore(); diff --git a/clang/unittests/Format/FormatTestVerilog.cpp b/clang/unittests/Format/FormatTestVerilog.cpp --- a/clang/unittests/Format/FormatTestVerilog.cpp +++ b/clang/unittests/Format/FormatTestVerilog.cpp @@ -45,6 +45,27 @@ } }; +TEST_F(FormatTestVerilog, BasedLiteral) { + verifyFormat("x = '0;"); + verifyFormat("x = '1;"); + verifyFormat("x = 'X;"); + verifyFormat("x = 'x;"); + verifyFormat("x = 'Z;"); + verifyFormat("x = 'z;"); + verifyFormat("x = 659;"); + verifyFormat("x = 'h837ff;"); + verifyFormat("x = 'o7460;"); + verifyFormat("x = 4'b1001;"); + verifyFormat("x = 5'D3;"); + verifyFormat("x = 3'b01x;"); + verifyFormat("x = 12'hx;"); + verifyFormat("x = 16'hz;"); + verifyFormat("x = -8'd6;"); + verifyFormat("x = 4'shf;"); + verifyFormat("x = -4'sd15;"); + verifyFormat("x = 16'sd?;"); +} + TEST_F(FormatTestVerilog, Delay) { // Delay by the default unit. verifyFormat("#0;"); @@ -139,6 +160,64 @@ " {x} = {x};"); } +TEST_F(FormatTestVerilog, Operators) { + // Test that unary operators are not followed by space. + verifyFormat("x = +x;"); + verifyFormat("x = -x;"); + verifyFormat("x = !x;"); + verifyFormat("x = ~x;"); + verifyFormat("x = &x;"); + verifyFormat("x = ~&x;"); + verifyFormat("x = |x;"); + verifyFormat("x = ~|x;"); + verifyFormat("x = ^x;"); + verifyFormat("x = ~^x;"); + verifyFormat("x = ^~x;"); + verifyFormat("x = ++x;"); + verifyFormat("x = --x;"); + + // Test that operators don't get split. + verifyFormat("x = x++;"); + verifyFormat("x = x--;"); + verifyFormat("x = x ** x;"); + verifyFormat("x = x << x;"); + verifyFormat("x = x >> x;"); + verifyFormat("x = x <<< x;"); + verifyFormat("x = x >>> x;"); + verifyFormat("x = x <= x;"); + verifyFormat("x = x >= x;"); + verifyFormat("x = x == x;"); + verifyFormat("x = x != x;"); + verifyFormat("x = x === x;"); + verifyFormat("x = x !== x;"); + verifyFormat("x = x ==? x;"); + verifyFormat("x = x !=? x;"); + verifyFormat("x = x ~^ x;"); + verifyFormat("x = x ^~ x;"); + verifyFormat("x = x && x;"); + verifyFormat("x = x || x;"); + verifyFormat("x = x->x;"); + verifyFormat("x = x <-> x;"); + verifyFormat("x += x;"); + verifyFormat("x -= x;"); + verifyFormat("x *= x;"); + verifyFormat("x /= x;"); + verifyFormat("x %= x;"); + verifyFormat("x &= x;"); + verifyFormat("x ^= x;"); + verifyFormat("x |= x;"); + verifyFormat("x <<= x;"); + verifyFormat("x >>= x;"); + verifyFormat("x <<<= x;"); + verifyFormat("x >>>= x;"); + verifyFormat("x <= x;"); + + // Test that space is added between operators. + EXPECT_EQ("x = x < -x;", format("x=x<-x;")); + EXPECT_EQ("x = x << -x;", format("x=x<<-x;")); + EXPECT_EQ("x = x <<< -x;", format("x=x<<<-x;")); +} + TEST_F(FormatTestVerilog, Preprocessor) { auto Style = getLLVMStyle(FormatStyle::LK_Verilog); Style.ColumnLimit = 20;