diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -2745,6 +2745,23 @@ /// \version 3.7 std::string MacroBlockEnd; + /// A list of macros of the form \c = . + /// + /// Code will be parsed with macros expanded, and formatting will try to best + /// match the structure of the expanded call. + /// + /// For example, with the macro "A(x)=x", the code + /// \code + /// A(a * b); + /// \endcode + /// will be formatted as a declaration of the variable \c b of type \c A* + /// (depending on pointer-binding rules) + /// \code + /// A(a* b); + /// \endcode + /// instead of as multiplication. + std::vector Macros; + /// The maximum number of consecutive empty lines to keep. /// \code /// MaxEmptyLinesToKeep: 1 vs. MaxEmptyLinesToKeep: 0 @@ -4306,7 +4323,8 @@ StatementAttributeLikeMacros == R.StatementAttributeLikeMacros && StatementMacros == R.StatementMacros && TabWidth == R.TabWidth && TypenameMacros == R.TypenameMacros && UseTab == R.UseTab && - WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros; + WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros && + Macros == R.Macros; } std::optional GetLanguageStyle(LanguageKind Language) const; diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -18,6 +18,7 @@ #include "WhitespaceManager.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Format/Format.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/Debug.h" @@ -739,9 +740,15 @@ if (Previous.is(TT_TemplateString) && Previous.opensScope()) CurrentState.NoLineBreak = true; + // Align following lines within parenthesis / brackets if configured. + // For a line of macro parents, the commas that follow the opening parenthesis + // in the line come after the opening parenthesis' children - we want to align + // the comma with the previous token's children instead of the opening + // parenthesis. if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign && !CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() && Previous.isNot(TT_ObjCMethodExpr) && Previous.isNot(TT_RequiresClause) && + !(Current.MacroParent && Previous.MacroParent) && (Current.isNot(TT_LineComment) || Previous.is(BK_BracedInit))) { CurrentState.Indent = State.Column + Spaces; CurrentState.IsAligned = true; diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1036,6 +1036,7 @@ IO.mapOptional("UseTab", Style.UseTab); IO.mapOptional("WhitespaceSensitiveMacros", Style.WhitespaceSensitiveMacros); + IO.mapOptional("Macros", Style.Macros); // If AlwaysBreakAfterDefinitionReturnType was specified but // AlwaysBreakAfterReturnType was not, initialize the latter from the diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -373,6 +373,11 @@ /// binary operator. TokenType getType() const { return Type; } void setType(TokenType T) { + // If this token is a macro argument while formatting an unexpanded macro + // call, we do not change its type any more - the type was deduced from + // formatting the expanded macro stream already. + if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg) + return; assert((!TypeIsFinalized || T == Type) && "Please use overwriteFixedType to change a fixed type."); Type = T; diff --git a/clang/lib/Format/Macros.h b/clang/lib/Format/Macros.h --- a/clang/lib/Format/Macros.h +++ b/clang/lib/Format/Macros.h @@ -149,7 +149,7 @@ /// /// After this point, the state of the spelled/expanded stream is "in sync" /// (both at the start of an UnwrappedLine, with no macros open), so the -/// Unexpander can be thrown away and parsing can continue. +/// Reconstructor can be thrown away and parsing can continue. /// /// Given a mapping from the macro name identifier token in the macro call /// to the tokens of the macro call, for example: diff --git a/clang/lib/Format/TokenAnalyzer.h b/clang/lib/Format/TokenAnalyzer.h --- a/clang/lib/Format/TokenAnalyzer.h +++ b/clang/lib/Format/TokenAnalyzer.h @@ -46,7 +46,7 @@ FileID getFileID() const { return ID; } - const SourceManager &getSourceManager() const { return SM; } + SourceManager &getSourceManager() const { return SM; } ArrayRef getCharRanges() const { return CharRanges; } diff --git a/clang/lib/Format/TokenAnalyzer.cpp b/clang/lib/Format/TokenAnalyzer.cpp --- a/clang/lib/Format/TokenAnalyzer.cpp +++ b/clang/lib/Format/TokenAnalyzer.cpp @@ -104,12 +104,12 @@ IdentifierTable IdentTable(getFormattingLangOpts(Style)); FormatTokenLexer Lex(Env.getSourceManager(), Env.getFileID(), Env.getFirstStartColumn(), Style, Encoding, Allocator, - IdentTable); ArrayRef Toks(Lex.lex()); SmallVector Tokens(Toks.begin(), Toks.end()); - UnwrappedLineParser Parser(Style, Lex.getKeywords(), - Env.getFirstStartColumn(), Tokens, *this); + UnwrappedLineParser Parser(Env.getSourceManager(), Style, Lex.getKeywords(), + Env.getFirstStartColumn(), Tokens, *this, + Allocator, IdentTable); Parser.parse(); assert(UnwrappedLines.back().empty()); unsigned Penalty = 0; diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h --- a/clang/lib/Format/TokenAnnotator.h +++ b/clang/lib/Format/TokenAnnotator.h @@ -65,20 +65,32 @@ // left them in a different state. First->Previous = nullptr; FormatToken *Current = First; + addChildren(Line.Tokens.front(), Current); for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) { + if (Node.Tok->MacroParent) + ContainsMacroCall = true; Current->Next = Node.Tok; Node.Tok->Previous = Current; Current = Current->Next; - Current->Children.clear(); - for (const auto &Child : Node.Children) { - Children.push_back(new AnnotatedLine(Child)); - Current->Children.push_back(Children.back()); - } + addChildren(Node, Current); + // FIXME: if we add children, previous will point to the token before + // the children; changing this requires significant changes across + // clang-format. } Last = Current; Last->Next = nullptr; } + void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) { + Current->Children.clear(); + for (const auto &Child : Node.Children) { + Children.push_back(new AnnotatedLine(Child)); + if (Children.back()->ContainsMacroCall) + ContainsMacroCall = true; + Current->Children.push_back(Children.back()); + } + } + ~AnnotatedLine() { for (AnnotatedLine *Child : Children) delete Child; @@ -149,6 +161,9 @@ bool MightBeFunctionDecl; bool IsMultiVariableDeclStmt; + /// \c True if this line contains a macro call for which an expansion exists. + bool ContainsMacroCall = false; + /// \c True if this line should be formatted, i.e. intersects directly or /// indirectly with one of the input ranges. bool Affected; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2595,6 +2595,13 @@ // Consume operators with higher precedence. parse(Precedence + 1); + // Do not assign fake parenthesis to tokens that are part of an + // unexpanded macro call. The line within the macro call contains + // the parenthesis and commas, and we will not find operators within + // that structure. + if (Current && Current->MacroParent) + break; + int CurrentPrecedence = getCurrentPrecedence(); if (Precedence == CurrentPrecedence && Current && @@ -4225,8 +4232,12 @@ Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) { return true; } - if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen)) + if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen) && + // In an unexpanded macro call we only find the parentheses and commas + // in a line; the commas and closing parenthesis do not require a space. + (Left.Children.empty() || !Left.MacroParent)) { return true; + } if (Right.is(tok::comma)) return false; if (Right.is(TT_ObjCBlockLParen)) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "UnwrappedLineFormatter.h" +#include "FormatToken.h" #include "NamespaceEndCommentsFixer.h" #include "WhitespaceManager.h" #include "llvm/Support/Debug.h" @@ -918,9 +919,12 @@ static void markFinalized(FormatToken *Tok) { for (; Tok; Tok = Tok->Next) { - Tok->Finalized = true; - for (AnnotatedLine *Child : Tok->Children) - markFinalized(Child->First); + if (Tok->MacroCtx && Tok->MacroCtx->Role == MR_ExpandedArg) { + Tok->MacroCtx->Role = MR_UnexpandedArg; + Tok->SpacesRequiredBefore = 0; + } else { + Tok->Finalized = true; + } } } @@ -975,15 +979,15 @@ bool formatChildren(LineState &State, bool NewLine, bool DryRun, unsigned &Penalty) { const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); + bool HasLBrace = LBrace && LBrace->is(tok::l_brace) && LBrace->is(BK_Block); FormatToken &Previous = *State.NextToken->Previous; - if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->isNot(BK_Block) || - Previous.Children.size() == 0) { + if (Previous.Children.size() == 0 || (!HasLBrace && !LBrace->MacroParent)) { // The previous token does not open a block. Nothing to do. We don't // assert so that we can simply call this function for all tokens. return true; } - if (NewLine) { + if (NewLine || Previous.MacroParent) { const ParenState &P = State.Stack.back(); int AdditionalIndent = @@ -1349,11 +1353,12 @@ NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); bool FitsIntoOneLine = - TheLine.Last->TotalLength + Indent <= ColumnLimit || - (TheLine.Type == LT_ImportStatement && - (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) || - (Style.isCSharp() && - TheLine.InPPDirective); // don't split #regions in C# + !TheLine.ContainsMacroCall && + (TheLine.Last->TotalLength + Indent <= ColumnLimit || + (TheLine.Type == LT_ImportStatement && + (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) || + (Style.isCSharp() && + TheLine.InPPDirective)); // don't split #regions in C# if (Style.ColumnLimit == 0) { NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) .formatLine(TheLine, NextStartColumn + Indent, diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -15,10 +15,14 @@ #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H +#include "Encoding.h" #include "FormatToken.h" +#include "Macros.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Format/Format.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/Regex.h" #include #include @@ -71,6 +75,11 @@ /// line with the corresponding closing brace. size_t MatchingClosingBlockLineIndex = kInvalidIndex; + void resetIndexes() { + MatchingOpeningBlockLineIndex = kInvalidIndex; + MatchingClosingBlockLineIndex = kInvalidIndex; + } + static const size_t kInvalidIndex = -1; unsigned FirstStartColumn = 0; @@ -84,13 +93,17 @@ }; class FormatTokenSource; +class MacroCallReconstructor; class UnwrappedLineParser { public: - UnwrappedLineParser(const FormatStyle &Style, + UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef Tokens, - UnwrappedLineConsumer &Callback); + UnwrappedLineConsumer &Callback, + llvm::SpecificBumpPtrAllocator &Allocator, + IdentifierTable &IdentTable); + ~UnwrappedLineParser(); void parse(); @@ -193,6 +206,7 @@ unsigned parseVerilogHierarchyHeader(); void parseVerilogTable(); void parseVerilogCaseLabel(); + llvm::SmallVector, 1> parseMacroCall(); // Used by addUnwrappedLine to denote whether to keep or remove a level // when resetting the line state. @@ -236,6 +250,10 @@ bool isOnNewLine(const FormatToken &FormatTok); + // Returns whether there is a macro expansion in the line, i.e. a token that + // was expanded from a macro call. + bool containsExpansion(const UnwrappedLine &Line); + // Compute hash of the current preprocessor branch. // This is used to identify the different branches, and thus track if block // open and close in the same branch. @@ -246,6 +264,26 @@ // and use that everywhere in the Parser. std::unique_ptr Line; + // Lines that are created by macro expansion. + // When formatting code containing macro calls, we first format the expanded + // lines to set the token types correctly. Afterwards, we format the + // reconstructed macro calls, re-using the token types determined in the first + // step. + SmallVector ExpandedLines; + + // Map from the macro identifier to a line containing the full unexpanded + // macro call. + llvm::DenseMap> Unexpanded; + + // For recursive macro expansions, trigger reconstruction only on the + // outermost expansion. + bool InExpansion = false; + + // Non-null while we reconstruct a macro call. + // For reconstruction, we feed the expanded lines into the reconstructor + // until it is finished. + std::unique_ptr Reconstruct; + // Comments are sorted into unwrapped lines by whether they are in the same // line as the previous token, or not. If not, they belong to the next token. // Since the next token might already be in a new unwrapped line, we need to @@ -345,13 +383,17 @@ // does not start at the beginning of the file. unsigned FirstStartColumn; + MacroExpander Macros; + friend class ScopedLineState; friend class CompoundStatementIndenter; }; struct UnwrappedLineNode { UnwrappedLineNode() : Tok(nullptr) {} - UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {} + UnwrappedLineNode(FormatToken *Tok, + llvm::ArrayRef Children = {}) + : Tok(Tok), Children(Children.begin(), Children.end()) {} FormatToken *Tok; SmallVector Children; diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -14,11 +14,15 @@ #include "UnwrappedLineParser.h" #include "FormatToken.h" +#include "FormatTokenLexer.h" #include "FormatTokenSource.h" +#include "Macros.h" #include "TokenAnnotator.h" #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_os_ostream.h" #include "llvm/Support/raw_ostream.h" #include @@ -143,11 +147,12 @@ unsigned OldLineLevel; }; -UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, - const AdditionalKeywords &Keywords, - unsigned FirstStartColumn, - ArrayRef Tokens, - UnwrappedLineConsumer &Callback) +UnwrappedLineParser::UnwrappedLineParser( + SourceManager &SourceMgr, const FormatStyle &Style, + const AdditionalKeywords &Keywords, unsigned FirstStartColumn, + ArrayRef Tokens, UnwrappedLineConsumer &Callback, + llvm::SpecificBumpPtrAllocator &Allocator, + IdentifierTable &IdentTable) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), CurrentLines(&Lines), Style(Style), Keywords(Keywords), CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), @@ -155,7 +160,10 @@ IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None ? IG_Rejected : IG_Inited), - IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} + IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), + Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {} + +UnwrappedLineParser::~UnwrappedLineParser() {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; @@ -173,6 +181,14 @@ NestedTooDeep.clear(); PPStack.clear(); Line->FirstStartColumn = FirstStartColumn; + + if (!Unexpanded.empty()) + for (FormatToken *Token : AllTokens) + Token->MacroCtx.reset(); + ExpandedLines.clear(); + Unexpanded.clear(); + InExpansion = false; + Reconstruct.reset(); } void UnwrappedLineParser::parse() { @@ -196,12 +212,24 @@ } // Create line with eof token. + assert(FormatTok->is(tok::eof)); pushToken(FormatTok); addUnwrappedLine(); - for (const UnwrappedLine &Line : Lines) - Callback.consumeUnwrappedLine(Line); + if (!ExpandedLines.empty()) { + LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); + for (const auto &Line : ExpandedLines) { + LLVM_DEBUG(printDebugInfo(Line)); + Callback.consumeUnwrappedLine(Line); + } + Callback.finishRun(); + } + LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); + for (const UnwrappedLine &Line : Lines) { + LLVM_DEBUG(printDebugInfo(Line)); + Callback.consumeUnwrappedLine(Line); + } Callback.finishRun(); Lines.clear(); while (!PPLevelBranchIndex.empty() && @@ -4152,12 +4180,25 @@ Line->Level = OrigLevel; } +bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) { + for (const auto &N : Line.Tokens) { + if (N.Tok->MacroCtx) + return true; + for (const UnwrappedLine &Child : N.Children) + if (containsExpansion(Child)) + return true; + } + return false; +} + void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { if (Line->Tokens.empty()) return; LLVM_DEBUG({ - if (CurrentLines == &Lines) + if (CurrentLines == &Lines) { + llvm::dbgs() << "Adding unwrapped line:\n"; printDebugInfo(*Line); + } }); // If this line closes a block when in Whitesmiths mode, remember that @@ -4168,7 +4209,44 @@ Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; - CurrentLines->push_back(std::move(*Line)); + // If the current line was expanded from a macro call, we use it to + // reconstruct an unwrapped line from the structure of the expanded unwrapped + // line and the unexpanded token stream. + if (CurrentLines == &Lines && !InExpansion && containsExpansion(*Line)) { + if (!Reconstruct) { + Reconstruct = + std::make_unique(Line->Level, Unexpanded); + } + Reconstruct->addLine(*Line); + if (Reconstruct->finished()) { + UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); + assert(!Reconstructed.Tokens.empty() && + "Reconstructed must at least contain the macro identifier."); + LLVM_DEBUG({ + if (CurrentLines == &Lines) { + llvm::dbgs() << "Adding unexpanded line:\n"; + printDebugInfo(Reconstructed); + } + }); + CurrentLines->push_back(std::move(Reconstructed)); + Reconstruct.reset(); + } + // FIXME: We format the expanded lines in an extra step that does not give + // the formatter all unwrapped lines, thus the indexes are invalid; to allow + // all features during expanded line formatting, recalcuate the indexes + // based on the available expanded lines where possible. + Line->resetIndexes(); + + // While the reconstructed unexpanded lines are stored in the normal + // flow of lines, the expanded lines are stored on the side to be analyzed + // in an extra step. + ExpandedLines.push_back(std::move(*Line)); + } else { + // At the top level we only get here when no unexpansion is going on, or + // when conditional formatting led to unfinished macro reconstructions. + assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); + CurrentLines->push_back(std::move(*Line)); + } Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; Line->FirstStartColumn = 0; @@ -4470,6 +4548,47 @@ continue; } + if (FormatTok->is(tok::identifier) && + Macros.defined(FormatTok->TokenText) && + // FIXME: Allow expanding macros in preprocessor directives. + !Line->InPPDirective) { + FormatToken *ID = FormatTok; + + // To correctly parse the code, we need to replace the tokens of the macro + // call with its expansion. + auto PreCall = std::move(Line); + Line.reset(new UnwrappedLine); + bool OldInExpansion = InExpansion; + InExpansion = true; + // We parse the macro call into a new line. + auto Args = parseMacroCall(); + InExpansion = OldInExpansion; + assert(Line->Tokens.front().Tok == ID); + // And remember the unexpanded macro call tokens. + Unexpanded[ID] = std::move(Line); + + // Next, we insert the expanded tokens in the token stream at the current + // position, and continue parsing. + Line = std::move(PreCall); + SmallVector New = Macros.expand(ID, Args); + if (!New.empty()) + FormatTok = Tokens->insertTokens(New); + + LLVM_DEBUG({ + llvm::dbgs() << "Call: " << ID->TokenText << "("; + for (const auto &Arg : Args) + for (const auto &T : Arg) + llvm::dbgs() << T->TokenText << " "; + llvm::dbgs() << ")\n"; + }); + LLVM_DEBUG({ + llvm::dbgs() << "Expanded: "; + for (const auto &T : New) + llvm::dbgs() << T->TokenText << " "; + llvm::dbgs() << "\n"; + }); + } + if (!FormatTok->is(tok::comment)) { distributeComments(Comments, FormatTok); Comments.clear(); @@ -4483,6 +4602,65 @@ Comments.clear(); } +namespace { +template +void pushTokens(Iterator Begin, Iterator End, + llvm::SmallVectorImpl &Into) { + for (auto I = Begin; I != End; ++I) { + Into.push_back(I->Tok); + for (const auto &Child : I->Children) + pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); + } +} +} // namespace + +llvm::SmallVector, 1> +UnwrappedLineParser::parseMacroCall() { + llvm::SmallVector, 1> Args; + assert(Line->Tokens.empty()); + nextToken(); + if (FormatTok->isNot(tok::l_paren)) + return Args; + nextToken(); + auto ArgStart = std::prev(Line->Tokens.end()); + + int Parens = 0; + do { + switch (FormatTok->Tok.getKind()) { + case tok::l_paren: + ++Parens; + nextToken(); + break; + case tok::r_paren: { + if (Parens > 0) { + --Parens; + nextToken(); + break; + } + Args.push_back({}); + pushTokens(std::next(ArgStart), Line->Tokens.end(), Args.back()); + nextToken(); + return Args; + } + case tok::comma: { + if (Parens > 0) { + nextToken(); + break; + } + Args.push_back({}); + pushTokens(std::next(ArgStart), Line->Tokens.end(), Args.back()); + nextToken(); + ArgStart = std::prev(Line->Tokens.end()); + break; + } + default: + nextToken(); + break; + } + } while (!eof()); + return {}; +} + void UnwrappedLineParser::pushToken(FormatToken *Tok) { Line->Tokens.push_back(UnwrappedLineNode(Tok)); if (MustBreakBeforeNextToken) { diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -49,7 +49,7 @@ unsigned Spaces, unsigned StartOfTokenColumn, bool IsAligned, bool InPPDirective) { - if (Tok.Finalized) + if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg)) return; Tok.setDecision((Newlines > 0) ? FD_Break : FD_Continue); Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange, @@ -60,7 +60,7 @@ void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, bool InPPDirective) { - if (Tok.Finalized) + if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg)) return; Changes.push_back(Change(Tok, /*CreateReplacement=*/false, Tok.WhitespaceRange, /*Spaces=*/0, @@ -84,7 +84,7 @@ const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, int Spaces) { - if (Tok.Finalized) + if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg)) return; SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); Changes.push_back( diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -66,7 +66,8 @@ void _verifyFormat(const char *File, int Line, llvm::StringRef Expected, llvm::StringRef Code, - const FormatStyle &Style = getLLVMStyle()) { + const FormatStyle &Style = getLLVMStyle(), + bool MessUp = true) { ScopedTrace t(File, Line, ::testing::Message() << Code.str()); EXPECT_EQ(Expected.str(), format(Expected, Style)) << "Expected code is not stable"; @@ -76,20 +77,24 @@ // needs to be checked for Objective-C++ as well. FormatStyle ObjCStyle = Style; ObjCStyle.Language = FormatStyle::LK_ObjC; - EXPECT_EQ(Expected.str(), format(test::messUp(Code), ObjCStyle)); + EXPECT_EQ(Expected.str(), + format(MessUp ? test::messUp(Code) : Code, ObjCStyle)); } } void _verifyFormat(const char *File, int Line, llvm::StringRef Code, - const FormatStyle &Style = getLLVMStyle()) { - _verifyFormat(File, Line, Code, test::messUp(Code), Style); + const FormatStyle &Style = getLLVMStyle(), + bool MessUp = true) { + _verifyFormat(File, Line, Code, MessUp ? test::messUp(Code) : Code, Style, + MessUp); } void _verifyIncompleteFormat(const char *File, int Line, llvm::StringRef Code, - const FormatStyle &Style = getLLVMStyle()) { + const FormatStyle &Style = getLLVMStyle(), + bool MessUp = true) { ScopedTrace t(File, Line, ::testing::Message() << Code.str()); - EXPECT_EQ(Code.str(), - format(test::messUp(Code), Style, SC_ExpectIncomplete)); + EXPECT_EQ(Code.str(), format(MessUp ? test::messUp(Code) : Code, Style, + SC_ExpectIncomplete)); } void _verifyIndependentOfContext(const char *File, int Line, @@ -22568,6 +22573,189 @@ "aaaallvm::outs()\n <<"); } +TEST_F(FormatTest, UnexpandConfiguredMacros) { + FormatStyle Style = getLLVMStyle(); + Style.Macros.push_back("CLASS=class C {"); + Style.Macros.push_back("SEMI=;"); + Style.Macros.push_back("STMT=f();"); + Style.Macros.push_back("ID(x)=x"); + Style.Macros.push_back("ID3(x, y, z)=x y z"); + Style.Macros.push_back("CALL(x)=f([] { x })"); + Style.Macros.push_back("ASSIGN_OR_RETURN(a, b, c)=a = (b) || (c)"); + + verifyFormat("ID(nested(a(b, c), d))", Style); + verifyFormat("CLASS\n" + " a *b;\n" + "};", + Style); + verifyFormat("SEMI\n" + "SEMI\n" + "SEMI", + Style); + verifyFormat("STMT\n" + "STMT\n" + "STMT", + Style); + verifyFormat("void f() { ID(a *b); }", Style); + verifyFormat(R"(ID( + { ID(a *b); }); +)", + Style); + verifyIncompleteFormat(R"(ID3({, ID(a *b), + ; + }); +)", + Style); + + verifyFormat("ID(CALL(CALL(return a * b;)));", Style); + + verifyFormat("ASSIGN_OR_RETURN(MySomewhatLongType *variable,\n" + " MySomewhatLongFunction(SomethingElse()));\n", + Style); + + verifyFormat(R"( +#define MACRO(a, b) ID(a + b) +)", + Style); + EXPECT_EQ(R"( +int a; +int b; +int c; +int d; +int e; +int f; +ID( + namespace foo { + int a; + } +) // namespace k +)", + format(R"( +int a; +int b; +int c; +int d; +int e; +int f; +ID(namespace foo { int a; }) // namespace k +)", + Style)); + verifyFormat(R"(ID( + // + ({ ; })) +)", + Style); + + Style.ColumnLimit = 35; + // FIXME: Arbitrary formatting of macros where the end of the logical + // line is in the middle of a macro call are not working yet. + verifyFormat(R"(ID( + void f(); + void) +ID(g) ID(()) ID( + ; + void g();) +)", + Style); + + Style.ColumnLimit = 10; + verifyFormat("STMT\n" + "STMT\n" + "STMT", + Style); + + EXPECT_EQ(R"( +ID(CALL(CALL( + a *b))); +)", + format(R"( +ID(CALL(CALL(a * b))); +)", + Style)); + + // FIXME: If we want to support unbalanced braces or parens from macro + // expansions we need to re-think how we propagate errors in + // TokenAnnotator::parseLine; for investigation, switching the inner loop of + // TokenAnnotator::parseLine to return LT_Other instead of LT_Invalid in case + // of !consumeToken() changes the formatting of the test below and makes it + // believe it has a fully correct formatting. + EXPECT_EQ(R"( +ID3( + { + CLASS + a *b; + }; + }, + ID(x *y); + , + STMT + STMT + STMT) +void f(); +)", + format(R"( +ID3({CLASS a*b; };}, ID(x*y);, STMT STMT STMT) +void f(); +)", + Style)); + + verifyFormat("ID(a(\n" + "#ifdef A\n" + " b, c\n" + "#else\n" + " d(e)\n" + "#endif\n" + " ))", + Style); + Style.ColumnLimit = 80; + verifyFormat(R"(ASSIGN_OR_RETURN( + // Comment + a b, c); +)", + Style); + Style.ColumnLimit = 30; + verifyFormat(R"(ASSIGN_OR_RETURN( + // Comment + // + a b, + xxxxxxxxxxxx( + yyyyyyyyyyyyyyyyy, + zzzzzzzzzzzzzzzzzz), + f([]() { + a(); + b(); + })); +)", + Style); + verifyFormat(R"(int a = []() { + ID( + x; + y; + z;) + ; +}(); +)", + Style); + verifyIncompleteFormat(R"(ASSIGN_OR_RETURN(( +==== +#)) +})", + Style, /*MessUp=*/false); + verifyIncompleteFormat(R"(ASSIGN_OR_RETURN( +} +( +==== +#), +a))", + Style, /*MessUp=*/false); + verifyFormat(R"(ASSIGN_OR_RETURN(a +// +==== +# + <))", + Style, /*MessUp=*/false); +} + TEST_F(FormatTest, HandleUnbalancedImplicitBracesAcrossPPBranches) { std::string code = "#if A\n" "#if B\n" diff --git a/clang/unittests/Format/TestLexer.h b/clang/unittests/Format/TestLexer.h --- a/clang/unittests/Format/TestLexer.h +++ b/clang/unittests/Format/TestLexer.h @@ -72,7 +72,8 @@ TokenList annotate(llvm::StringRef Code) { FormatTokenLexer Lex = getNewLexer(Code); auto Tokens = Lex.lex(); - UnwrappedLineParser Parser(Style, Lex.getKeywords(), 0, Tokens, *this); + UnwrappedLineParser Parser(SourceMgr.get(), Style, Lex.getKeywords(), 0, + Tokens, *this, Allocator, IdentTable); Parser.parse(); TokenAnnotator Annotator(Style, Lex.getKeywords()); for (auto &Line : UnwrappedLines) {