diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -2745,6 +2745,23 @@ /// \version 3.7 std::string MacroBlockEnd; + /// A list of macros of the form \c = . + /// + /// Code will be parsed with macros expanded, and formatting will try to best + /// match the structure of the expanded call. + /// + /// For example, with the macro "A(x)=x", the code + /// \code + /// A(a * b); + /// \endcode + /// will be formatted as a declaration of the variable \c b of type \c A* + /// (depending on pointer-binding rules) + /// \code + /// A(a* b); + /// \endcode + /// instead of as multiplication. + std::vector Macros; + /// The maximum number of consecutive empty lines to keep. /// \code /// MaxEmptyLinesToKeep: 1 vs. MaxEmptyLinesToKeep: 0 @@ -4306,7 +4323,8 @@ StatementAttributeLikeMacros == R.StatementAttributeLikeMacros && StatementMacros == R.StatementMacros && TabWidth == R.TabWidth && TypenameMacros == R.TypenameMacros && UseTab == R.UseTab && - WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros; + WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros && + Macros == R.Macros; } std::optional GetLanguageStyle(LanguageKind Language) const; diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -18,6 +18,7 @@ #include "WhitespaceManager.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Format/Format.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/Debug.h" @@ -739,9 +740,14 @@ if (Previous.is(TT_TemplateString) && Previous.opensScope()) CurrentState.NoLineBreak = true; + // Align following lines within parentheses / brackets if configured. + // Note: This doesn't apply to macro expansion lines, which are MACRO( , , ) + // with args as children of the '(' and ',' tokens. It does not make sense to + // align the commas with the opening paren. if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign && !CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() && Previous.isNot(TT_ObjCMethodExpr) && Previous.isNot(TT_RequiresClause) && + !(Current.MacroParent && Previous.MacroParent) && (Current.isNot(TT_LineComment) || Previous.is(BK_BracedInit))) { CurrentState.Indent = State.Column + Spaces; CurrentState.IsAligned = true; diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1036,6 +1036,7 @@ IO.mapOptional("UseTab", Style.UseTab); IO.mapOptional("WhitespaceSensitiveMacros", Style.WhitespaceSensitiveMacros); + IO.mapOptional("Macros", Style.Macros); // If AlwaysBreakAfterDefinitionReturnType was specified but // AlwaysBreakAfterReturnType was not, initialize the latter from the diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -373,6 +373,11 @@ /// binary operator. TokenType getType() const { return Type; } void setType(TokenType T) { + // If this token is a macro argument while formatting an unexpanded macro + // call, we do not change its type any more - the type was deduced from + // formatting the expanded macro stream already. + if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg) + return; assert((!TypeIsFinalized || T == Type) && "Please use overwriteFixedType to change a fixed type."); Type = T; diff --git a/clang/lib/Format/MacroExpander.cpp b/clang/lib/Format/MacroExpander.cpp --- a/clang/lib/Format/MacroExpander.cpp +++ b/clang/lib/Format/MacroExpander.cpp @@ -145,6 +145,10 @@ } } +unsigned MacroExpander::getArity(llvm::StringRef Name) const { + return Definitions.find(Name)->second.Params.size(); +} + bool MacroExpander::defined(llvm::StringRef Name) const { return Definitions.find(Name) != Definitions.end(); } diff --git a/clang/lib/Format/Macros.h b/clang/lib/Format/Macros.h --- a/clang/lib/Format/Macros.h +++ b/clang/lib/Format/Macros.h @@ -113,6 +113,9 @@ /// subsequent parentheses. bool objectLike(llvm::StringRef Name) const; + /// Returns the arity of the macro \p Name. + unsigned getArity(llvm::StringRef Name) const; + /// Returns the expanded stream of format tokens for \p ID, where /// each element in \p Args is a positional argument to the macro call. llvm::SmallVector expand(FormatToken *ID, @@ -149,7 +152,7 @@ /// /// After this point, the state of the spelled/expanded stream is "in sync" /// (both at the start of an UnwrappedLine, with no macros open), so the -/// Unexpander can be thrown away and parsing can continue. +/// Reconstructor can be thrown away and parsing can continue. /// /// Given a mapping from the macro name identifier token in the macro call /// to the tokens of the macro call, for example: diff --git a/clang/lib/Format/TokenAnalyzer.h b/clang/lib/Format/TokenAnalyzer.h --- a/clang/lib/Format/TokenAnalyzer.h +++ b/clang/lib/Format/TokenAnalyzer.h @@ -46,7 +46,7 @@ FileID getFileID() const { return ID; } - const SourceManager &getSourceManager() const { return SM; } + SourceManager &getSourceManager() const { return SM; } ArrayRef getCharRanges() const { return CharRanges; } diff --git a/clang/lib/Format/TokenAnalyzer.cpp b/clang/lib/Format/TokenAnalyzer.cpp --- a/clang/lib/Format/TokenAnalyzer.cpp +++ b/clang/lib/Format/TokenAnalyzer.cpp @@ -104,12 +104,12 @@ IdentifierTable IdentTable(getFormattingLangOpts(Style)); FormatTokenLexer Lex(Env.getSourceManager(), Env.getFileID(), Env.getFirstStartColumn(), Style, Encoding, Allocator, - IdentTable); ArrayRef Toks(Lex.lex()); SmallVector Tokens(Toks.begin(), Toks.end()); - UnwrappedLineParser Parser(Style, Lex.getKeywords(), - Env.getFirstStartColumn(), Tokens, *this); + UnwrappedLineParser Parser(Env.getSourceManager(), Style, Lex.getKeywords(), + Env.getFirstStartColumn(), Tokens, *this, + Allocator, IdentTable); Parser.parse(); assert(UnwrappedLines.back().empty()); unsigned Penalty = 0; diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h --- a/clang/lib/Format/TokenAnnotator.h +++ b/clang/lib/Format/TokenAnnotator.h @@ -65,20 +65,32 @@ // left them in a different state. First->Previous = nullptr; FormatToken *Current = First; + addChildren(Line.Tokens.front(), Current); for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) { + if (Node.Tok->MacroParent) + ContainsMacroCall = true; Current->Next = Node.Tok; Node.Tok->Previous = Current; Current = Current->Next; - Current->Children.clear(); - for (const auto &Child : Node.Children) { - Children.push_back(new AnnotatedLine(Child)); - Current->Children.push_back(Children.back()); - } + addChildren(Node, Current); + // FIXME: if we add children, previous will point to the token before + // the children; changing this requires significant changes across + // clang-format. } Last = Current; Last->Next = nullptr; } + void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) { + Current->Children.clear(); + for (const auto &Child : Node.Children) { + Children.push_back(new AnnotatedLine(Child)); + if (Children.back()->ContainsMacroCall) + ContainsMacroCall = true; + Current->Children.push_back(Children.back()); + } + } + ~AnnotatedLine() { for (AnnotatedLine *Child : Children) delete Child; @@ -149,6 +161,9 @@ bool MightBeFunctionDecl; bool IsMultiVariableDeclStmt; + /// \c True if this line contains a macro call for which an expansion exists. + bool ContainsMacroCall = false; + /// \c True if this line should be formatted, i.e. intersects directly or /// indirectly with one of the input ranges. bool Affected; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2595,6 +2595,13 @@ // Consume operators with higher precedence. parse(Precedence + 1); + // Do not assign fake parenthesis to tokens that are part of an + // unexpanded macro call. The line within the macro call contains + // the parenthesis and commas, and we will not find operators within + // that structure. + if (Current && Current->MacroParent) + break; + int CurrentPrecedence = getCurrentPrecedence(); if (Precedence == CurrentPrecedence && Current && @@ -4225,8 +4232,12 @@ Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) { return true; } - if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen)) + if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen) && + // In an unexpanded macro call we only find the parentheses and commas + // in a line; the commas and closing parenthesis do not require a space. + (Left.Children.empty() || !Left.MacroParent)) { return true; + } if (Right.is(tok::comma)) return false; if (Right.is(TT_ObjCBlockLParen)) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "UnwrappedLineFormatter.h" +#include "FormatToken.h" #include "NamespaceEndCommentsFixer.h" #include "WhitespaceManager.h" #include "llvm/Support/Debug.h" @@ -918,9 +919,22 @@ static void markFinalized(FormatToken *Tok) { for (; Tok; Tok = Tok->Next) { - Tok->Finalized = true; - for (AnnotatedLine *Child : Tok->Children) - markFinalized(Child->First); + if (Tok->MacroCtx && Tok->MacroCtx->Role == MR_ExpandedArg) { + // In the first pass we format all macro arguments in the expanded token + // stream. Instead of finalizing the macro arguments, we mark that they + // will be modified as unexpanded arguments (as part of the macro call + // formatting) in the next pass. + Tok->MacroCtx->Role = MR_UnexpandedArg; + // Reset whether spaces are required before this token, as that is context + // dependent, and that context may change when formatting the macro call. + // For example, given M(x) -> 2 * x, and the macro call M(var), + // the token 'var' will have SpacesRequiredBefore = 1 after being + // formatted as part of the expanded macro, but SpacesRequiredBefore = 0 + // for its position within the macro call. + Tok->SpacesRequiredBefore = 0; + } else { + Tok->Finalized = true; + } } } @@ -975,15 +989,15 @@ bool formatChildren(LineState &State, bool NewLine, bool DryRun, unsigned &Penalty) { const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); + bool HasLBrace = LBrace && LBrace->is(tok::l_brace) && LBrace->is(BK_Block); FormatToken &Previous = *State.NextToken->Previous; - if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->isNot(BK_Block) || - Previous.Children.size() == 0) { + if (Previous.Children.size() == 0 || (!HasLBrace && !LBrace->MacroParent)) { // The previous token does not open a block. Nothing to do. We don't // assert so that we can simply call this function for all tokens. return true; } - if (NewLine) { + if (NewLine || Previous.MacroParent) { const ParenState &P = State.Stack.back(); int AdditionalIndent = @@ -1349,11 +1363,12 @@ NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); bool FitsIntoOneLine = - TheLine.Last->TotalLength + Indent <= ColumnLimit || - (TheLine.Type == LT_ImportStatement && - (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) || - (Style.isCSharp() && - TheLine.InPPDirective); // don't split #regions in C# + !TheLine.ContainsMacroCall && + (TheLine.Last->TotalLength + Indent <= ColumnLimit || + (TheLine.Type == LT_ImportStatement && + (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) || + (Style.isCSharp() && + TheLine.InPPDirective)); // don't split #regions in C# if (Style.ColumnLimit == 0) { NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) .formatLine(TheLine, NextStartColumn + Indent, diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -15,10 +15,14 @@ #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H +#include "Encoding.h" #include "FormatToken.h" +#include "Macros.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Format/Format.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/Regex.h" #include #include @@ -76,6 +80,18 @@ unsigned FirstStartColumn = 0; }; +/// Interface for users of the UnwrappedLineParser to receive the parsed lines. +/// Parsing a single snippet of code can lead to multiple runs, where each +/// run is a coherent view of the file. +/// For example, different runs are generated: +/// - for different combinations of #if blocks +/// - for code where macros are expanded and the code with the original +/// macro calls. +/// Some tokens will only be visible in a subset of the runs. +/// For each run, \c UnwrappedLineParser will call \c consumeUnwrappedLine +/// for each parsed unwrapped line, and then \c finishRun to indicate +/// that the set of unwrapped lines before is one coherent view of the +/// code snippet to be formatted. class UnwrappedLineConsumer { public: virtual ~UnwrappedLineConsumer() {} @@ -87,10 +103,12 @@ class UnwrappedLineParser { public: - UnwrappedLineParser(const FormatStyle &Style, + UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef Tokens, - UnwrappedLineConsumer &Callback); + UnwrappedLineConsumer &Callback, + llvm::SpecificBumpPtrAllocator &Allocator, + IdentifierTable &IdentTable); void parse(); @@ -193,6 +211,7 @@ unsigned parseVerilogHierarchyHeader(); void parseVerilogTable(); void parseVerilogCaseLabel(); + llvm::SmallVector, 1> parseMacroCall(); // Used by addUnwrappedLine to denote whether to keep or remove a level // when resetting the line state. @@ -236,16 +255,49 @@ bool isOnNewLine(const FormatToken &FormatTok); + // Returns whether there is a macro expansion in the line, i.e. a token that + // was expanded from a macro call. + bool containsExpansion(const UnwrappedLine &Line); + // Compute hash of the current preprocessor branch. // This is used to identify the different branches, and thus track if block // open and close in the same branch. size_t computePPHash() const; + bool parsingPPDirective() { return CurrentLines != &Lines; } + // FIXME: We are constantly running into bugs where Line.Level is incorrectly // subtracted from beyond 0. Introduce a method to subtract from Line.Level // and use that everywhere in the Parser. std::unique_ptr Line; + // Lines that are created by macro expansion. + // When formatting code containing macro calls, we first format the expanded + // lines to set the token types correctly. Afterwards, we format the + // reconstructed macro calls, re-using the token types determined in the first + // step. + // ExpandedLines will be reset every time we create a new LineAndExpansion + // instance once a line containing macro calls has been parsed. + SmallVector CurrentExpandedLines; + + // Maps from the first token of a top-level UnwrappedLine that contains + // a macro call to the replacement UnwrappedLines expanded from the macro + // call. + llvm::DenseMap> ExpandedLines; + + // Map from the macro identifier to a line containing the full unexpanded + // macro call. + llvm::DenseMap> Unexpanded; + + // For recursive macro expansions, trigger reconstruction only on the + // outermost expansion. + bool InExpansion = false; + + // Set while we reconstruct a macro call. + // For reconstruction, we feed the expanded lines into the reconstructor + // until it is finished. + std::optional Reconstruct; + // Comments are sorted into unwrapped lines by whether they are in the same // line as the previous token, or not. If not, they belong to the next token. // Since the next token might already be in a new unwrapped line, we need to @@ -345,13 +397,17 @@ // does not start at the beginning of the file. unsigned FirstStartColumn; + MacroExpander Macros; + friend class ScopedLineState; friend class CompoundStatementIndenter; }; struct UnwrappedLineNode { UnwrappedLineNode() : Tok(nullptr) {} - UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {} + UnwrappedLineNode(FormatToken *Tok, + llvm::ArrayRef Children = {}) + : Tok(Tok), Children(Children.begin(), Children.end()) {} FormatToken *Tok; SmallVector Children; diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -14,11 +14,15 @@ #include "UnwrappedLineParser.h" #include "FormatToken.h" +#include "FormatTokenLexer.h" #include "FormatTokenSource.h" +#include "Macros.h" #include "TokenAnnotator.h" #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_os_ostream.h" #include "llvm/Support/raw_ostream.h" #include @@ -143,11 +147,12 @@ unsigned OldLineLevel; }; -UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, - const AdditionalKeywords &Keywords, - unsigned FirstStartColumn, - ArrayRef Tokens, - UnwrappedLineConsumer &Callback) +UnwrappedLineParser::UnwrappedLineParser( + SourceManager &SourceMgr, const FormatStyle &Style, + const AdditionalKeywords &Keywords, unsigned FirstStartColumn, + ArrayRef Tokens, UnwrappedLineConsumer &Callback, + llvm::SpecificBumpPtrAllocator &Allocator, + IdentifierTable &IdentTable) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), CurrentLines(&Lines), Style(Style), Keywords(Keywords), CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), @@ -155,7 +160,8 @@ IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None ? IG_Rejected : IG_Inited), - IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} + IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), + Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; @@ -173,6 +179,15 @@ NestedTooDeep.clear(); PPStack.clear(); Line->FirstStartColumn = FirstStartColumn; + + if (!Unexpanded.empty()) + for (FormatToken *Token : AllTokens) + Token->MacroCtx.reset(); + CurrentExpandedLines.clear(); + ExpandedLines.clear(); + Unexpanded.clear(); + InExpansion = false; + Reconstruct.reset(); } void UnwrappedLineParser::parse() { @@ -196,12 +211,36 @@ } // Create line with eof token. + assert(FormatTok->is(tok::eof)); pushToken(FormatTok); addUnwrappedLine(); - for (const UnwrappedLine &Line : Lines) - Callback.consumeUnwrappedLine(Line); + // In a first run, format everything with the lines containing macro calls + // replaced by the expansion. + if (!ExpandedLines.empty()) { + LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); + for (const auto &Line : Lines) { + if (!Line.Tokens.empty()) { + auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); + if (it != ExpandedLines.end()) { + for (const auto &Expanded : it->second) { + LLVM_DEBUG(printDebugInfo(Expanded)); + Callback.consumeUnwrappedLine(Expanded); + } + continue; + } + } + LLVM_DEBUG(printDebugInfo(Line)); + Callback.consumeUnwrappedLine(Line); + } + Callback.finishRun(); + } + LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); + for (const UnwrappedLine &Line : Lines) { + LLVM_DEBUG(printDebugInfo(Line)); + Callback.consumeUnwrappedLine(Line); + } Callback.finishRun(); Lines.clear(); while (!PPLevelBranchIndex.empty() && @@ -724,7 +763,7 @@ parseParens(); size_t NbPreprocessorDirectives = - CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; + !parsingPPDirective() ? PreprocessorDirectives.size() : 0; addUnwrappedLine(); size_t OpeningLineIndex = CurrentLines->empty() @@ -4152,12 +4191,25 @@ Line->Level = OrigLevel; } +bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) { + for (const auto &N : Line.Tokens) { + if (N.Tok->MacroCtx) + return true; + for (const UnwrappedLine &Child : N.Children) + if (containsExpansion(Child)) + return true; + } + return false; +} + void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { if (Line->Tokens.empty()) return; LLVM_DEBUG({ - if (CurrentLines == &Lines) + if (!parsingPPDirective()) { + llvm::dbgs() << "Adding unwrapped line:\n"; printDebugInfo(*Line); + } }); // If this line closes a block when in Whitesmiths mode, remember that @@ -4168,7 +4220,39 @@ Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; - CurrentLines->push_back(std::move(*Line)); + // If the current line was expanded from a macro call, we use it to + // reconstruct an unwrapped line from the structure of the expanded unwrapped + // line and the unexpanded token stream. + if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { + if (!Reconstruct) + Reconstruct.emplace(Line->Level, Unexpanded); + Reconstruct->addLine(*Line); + + // While the reconstructed unexpanded lines are stored in the normal + // flow of lines, the expanded lines are stored on the side to be analyzed + // in an extra step. + CurrentExpandedLines.push_back(std::move(*Line)); + + if (Reconstruct->finished()) { + UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); + assert(!Reconstructed.Tokens.empty() && + "Reconstructed must at least contain the macro identifier."); + assert(!parsingPPDirective()); + LLVM_DEBUG({ + llvm::dbgs() << "Adding unexpanded line:\n"; + printDebugInfo(Reconstructed); + }); + ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; + Lines.push_back(std::move(Reconstructed)); + CurrentExpandedLines.clear(); + Reconstruct.reset(); + } + } else { + // At the top level we only get here when no unexpansion is going on, or + // when conditional formatting led to unfinished macro reconstructions. + assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); + CurrentLines->push_back(std::move(*Line)); + } Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; Line->FirstStartColumn = 0; @@ -4176,7 +4260,7 @@ if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) --Line->Level; - if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { + if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), std::make_move_iterator(PreprocessorDirectives.end())); @@ -4470,6 +4554,56 @@ continue; } + if (FormatTok->is(tok::identifier) && + Macros.defined(FormatTok->TokenText) && + (Macros.objectLike(FormatTok->TokenText) || + Tokens->peekNextToken()->is(tok::l_paren)) && + // FIXME: Allow expanding macros in preprocessor directives. + !Line->InPPDirective) { + FormatToken *ID = FormatTok; + + // To correctly parse the code, we need to replace the tokens of the macro + // call with its expansion. + auto PreCall = std::move(Line); + Line.reset(new UnwrappedLine); + bool OldInExpansion = InExpansion; + InExpansion = true; + // We parse the macro call into a new line. + auto Args = parseMacroCall(); + LLVM_DEBUG({ + if (Args.size() != Macros.getArity(ID->TokenText)) { + llvm::dbgs() << "Macro " << ID->TokenText << " takes " + << Macros.getArity(ID->TokenText) + << " arguments, but got " << Args.size() << "\n"; + } + }); + InExpansion = OldInExpansion; + assert(Line->Tokens.front().Tok == ID); + // And remember the unexpanded macro call tokens. + Unexpanded[ID] = std::move(Line); + + // Next, we insert the expanded tokens in the token stream at the current + // position, and continue parsing. + Line = std::move(PreCall); + SmallVector New = Macros.expand(ID, Args); + if (!New.empty()) + FormatTok = Tokens->insertTokens(New); + + LLVM_DEBUG({ + llvm::dbgs() << "Call: " << ID->TokenText << "("; + for (const auto &Arg : Args) + for (const auto &T : Arg) + llvm::dbgs() << T->TokenText << " "; + llvm::dbgs() << ")\n"; + }); + LLVM_DEBUG({ + llvm::dbgs() << "Expanded: "; + for (const auto &T : New) + llvm::dbgs() << T->TokenText << " "; + llvm::dbgs() << "\n"; + }); + } + if (!FormatTok->is(tok::comment)) { distributeComments(Comments, FormatTok); Comments.clear(); @@ -4483,6 +4617,67 @@ Comments.clear(); } +namespace { +template +void pushTokens(Iterator Begin, Iterator End, + llvm::SmallVectorImpl &Into) { + for (auto I = Begin; I != End; ++I) { + Into.push_back(I->Tok); + for (const auto &Child : I->Children) + pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); + } +} +} // namespace + +llvm::SmallVector, 1> +UnwrappedLineParser::parseMacroCall() { + llvm::SmallVector, 1> Args; + assert(Line->Tokens.empty()); + FormatToken *ID = FormatTok; + nextToken(); + if (Macros.objectLike(ID->TokenText)) + return Args; + assert(FormatTok->is(tok::l_paren)); + nextToken(); + auto ArgStart = std::prev(Line->Tokens.end()); + + int Parens = 0; + do { + switch (FormatTok->Tok.getKind()) { + case tok::l_paren: + ++Parens; + nextToken(); + break; + case tok::r_paren: { + if (Parens > 0) { + --Parens; + nextToken(); + break; + } + Args.push_back({}); + pushTokens(std::next(ArgStart), Line->Tokens.end(), Args.back()); + nextToken(); + return Args; + } + case tok::comma: { + if (Parens > 0) { + nextToken(); + break; + } + Args.push_back({}); + pushTokens(std::next(ArgStart), Line->Tokens.end(), Args.back()); + nextToken(); + ArgStart = std::prev(Line->Tokens.end()); + break; + } + default: + nextToken(); + break; + } + } while (!eof()); + return {}; +} + void UnwrappedLineParser::pushToken(FormatToken *Tok) { Line->Tokens.push_back(UnwrappedLineNode(Tok)); if (MustBreakBeforeNextToken) { diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -49,7 +49,7 @@ unsigned Spaces, unsigned StartOfTokenColumn, bool IsAligned, bool InPPDirective) { - if (Tok.Finalized) + if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg)) return; Tok.setDecision((Newlines > 0) ? FD_Break : FD_Continue); Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange, @@ -60,7 +60,7 @@ void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, bool InPPDirective) { - if (Tok.Finalized) + if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg)) return; Changes.push_back(Change(Tok, /*CreateReplacement=*/false, Tok.WhitespaceRange, /*Spaces=*/0, @@ -84,7 +84,7 @@ const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, int Spaces) { - if (Tok.Finalized) + if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg)) return; SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); Changes.push_back( diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -22568,6 +22568,230 @@ "aaaallvm::outs()\n <<"); } +TEST_F(FormatTest, UnexpandConfiguredMacros) { + FormatStyle Style = getLLVMStyle(); + Style.Macros.push_back("CLASS=class C {"); + Style.Macros.push_back("SEMI=;"); + Style.Macros.push_back("STMT=f();"); + Style.Macros.push_back("ID(x)=x"); + Style.Macros.push_back("ID3(x, y, z)=x y z"); + Style.Macros.push_back("CALL(x)=f([] { x })"); + Style.Macros.push_back("ASSIGN_OR_RETURN(a, b, c)=a = (b) || (c)"); + Style.Macros.push_back("MOCK_METHOD(r, n, a, s)=r n a s"); + + verifyFormat("ID(nested(a(b, c), d))", Style); + verifyFormat("CLASS\n" + " a *b;\n" + "};", + Style); + verifyFormat("SEMI\n" + "SEMI\n" + "SEMI", + Style); + verifyFormat("STMT\n" + "STMT\n" + "STMT", + Style); + verifyFormat("void f() { ID(a *b); }", Style); + verifyFormat(R"(ID( + { ID(a *b); }); +)", + Style); + verifyIncompleteFormat(R"(ID3({, ID(a *b), + ; + }); +)", + Style); + + verifyFormat("ID(CALL(CALL(return a * b;)));", Style); + + verifyFormat("ASSIGN_OR_RETURN(MySomewhatLongType *variable,\n" + " MySomewhatLongFunction(SomethingElse()));\n", + Style); + + verifyFormat(R"( +#define MACRO(a, b) ID(a + b) +)", + Style); + EXPECT_EQ(R"( +int a; +int b; +int c; +int d; +int e; +int f; +ID( + namespace foo { + int a; + } +) // namespace k +)", + format(R"( +int a; +int b; +int c; +int d; +int e; +int f; +ID(namespace foo { int a; }) // namespace k +)", + Style)); + verifyFormat(R"(ID( + // + ({ ; })) +)", + Style); + + Style.ColumnLimit = 35; + // FIXME: Arbitrary formatting of macros where the end of the logical + // line is in the middle of a macro call are not working yet. + verifyFormat(R"(ID( + void f(); + void) +ID(g) ID(()) ID( + ; + void g();) +)", + Style); + + Style.ColumnLimit = 10; + verifyFormat("STMT\n" + "STMT\n" + "STMT", + Style); + + EXPECT_EQ(R"( +ID(CALL(CALL( + a *b))); +)", + format(R"( +ID(CALL(CALL(a * b))); +)", + Style)); + + // FIXME: If we want to support unbalanced braces or parens from macro + // expansions we need to re-think how we propagate errors in + // TokenAnnotator::parseLine; for investigation, switching the inner loop of + // TokenAnnotator::parseLine to return LT_Other instead of LT_Invalid in case + // of !consumeToken() changes the formatting of the test below and makes it + // believe it has a fully correct formatting. + EXPECT_EQ(R"( +ID3( + { + CLASS + a *b; + }; + }, + ID(x *y); + , + STMT + STMT + STMT) +void f(); +)", + format(R"( +ID3({CLASS a*b; };}, ID(x*y);, STMT STMT STMT) +void f(); +)", + Style)); + + verifyFormat("ID(a(\n" + "#ifdef A\n" + " b, c\n" + "#else\n" + " d(e)\n" + "#endif\n" + " ))", + Style); + Style.ColumnLimit = 80; + verifyFormat(R"(ASSIGN_OR_RETURN( + // Comment + a b, c); +)", + Style); + Style.ColumnLimit = 30; + verifyFormat(R"(ASSIGN_OR_RETURN( + // Comment + // + a b, + xxxxxxxxxxxx( + yyyyyyyyyyyyyyyyy, + zzzzzzzzzzzzzzzzzz), + f([]() { + a(); + b(); + })); +)", + Style); + verifyFormat(R"(int a = []() { + ID( + x; + y; + z;) + ; +}(); +)", + Style); + EXPECT_EQ( + R"(ASSIGN_OR_RETURN(( +==== +#)) +})", + format(R"(ASSIGN_OR_RETURN(( +==== +#)) +})", + Style, SC_ExpectIncomplete)); + EXPECT_EQ(R"(ASSIGN_OR_RETURN( +} +( +==== +#), +a))", + format(R"(ASSIGN_OR_RETURN( +} +( +==== +#), +a))", + Style, SC_ExpectIncomplete)); + EXPECT_EQ(R"(ASSIGN_OR_RETURN(a +// +==== +# + <))", + format(R"(ASSIGN_OR_RETURN(a +// +==== +# + <))", + Style)); + verifyFormat("class C {\n" + " MOCK_METHOD(R, f,\n" + " (a *b, c *d),\n" + " (override));\n" + "};", + Style); +} + +TEST_F(FormatTest, KeepParensWhenExpandingObjectLikeMacros) { + FormatStyle Style = getLLVMStyle(); + Style.Macros.push_back("FN=class C { int f"); + verifyFormat("void f() { FN(a *b);\n" + " };\n" + "}", + Style); +} + +TEST_F(FormatTest, DoesNotExpandFunctionLikeMacrosWithoutParens) { + FormatStyle Style = getLLVMStyle(); + Style.Macros.push_back("CLASS()=class C {"); + verifyFormat("CLASS void f();\n" + "}\n" + ";", + Style); +} + TEST_F(FormatTest, HandleUnbalancedImplicitBracesAcrossPPBranches) { std::string code = "#if A\n" "#if B\n" diff --git a/clang/unittests/Format/TestLexer.h b/clang/unittests/Format/TestLexer.h --- a/clang/unittests/Format/TestLexer.h +++ b/clang/unittests/Format/TestLexer.h @@ -72,7 +72,8 @@ TokenList annotate(llvm::StringRef Code) { FormatTokenLexer Lex = getNewLexer(Code); auto Tokens = Lex.lex(); - UnwrappedLineParser Parser(Style, Lex.getKeywords(), 0, Tokens, *this); + UnwrappedLineParser Parser(SourceMgr.get(), Style, Lex.getKeywords(), 0, + Tokens, *this, Allocator, IdentTable); Parser.parse(); TokenAnnotator Annotator(Style, Lex.getKeywords()); for (auto &Line : UnwrappedLines) {