diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt --- a/clang/lib/Format/CMakeLists.txt +++ b/clang/lib/Format/CMakeLists.txt @@ -8,6 +8,7 @@ Format.cpp FormatToken.cpp FormatTokenLexer.cpp + MacroCallReconstructor.cpp MacroExpander.cpp NamespaceEndCommentsFixer.cpp QualifierAlignmentFixer.cpp diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -497,6 +497,15 @@ // in a configured macro expansion. llvm::Optional MacroCtx; + /// When macro expansion introduces nodes with children, those are marked as + /// \c MacroParent. + /// FIXME: The formatting code currently hard-codes the assumption that + /// child nodes are introduced by blocks following an opening brace. + /// This is deeply baked into the code and disentangling this will require + /// signficant refactorings. \c MacroParent allows us to special-case the + /// cases in which we treat parents as block-openers for now. + bool MacroParent = false; + bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } bool is(TokenType TT) const { return getType() == TT; } bool is(const IdentifierInfo *II) const { diff --git a/clang/lib/Format/MacroCallReconstructor.cpp b/clang/lib/Format/MacroCallReconstructor.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Format/MacroCallReconstructor.cpp @@ -0,0 +1,569 @@ +//===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the implementation of MacroCallReconstructor, which fits +/// an reconstructed macro call to a parsed set of UnwrappedLines. +/// +//===----------------------------------------------------------------------===// + +#include "Macros.h" + +#include "UnwrappedLineParser.h" +#include "clang/Basic/TokenKinds.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/Debug.h" +#include + +#define DEBUG_TYPE "format-reconstruct" + +namespace clang { +namespace format { + +// Call \p Call for each token in the unwrapped line given, passing +// the token, its parent and whether it is the first token in the line. +template +void forEachToken(const UnwrappedLine &Line, const T &Call, + FormatToken *Parent = nullptr) { + bool First = true; + for (const auto &N : Line.Tokens) { + Call(N.Tok, Parent, First); + First = false; + for (const auto &Child : N.Children) { + forEachToken(Child, Call, N.Tok); + } + } +} + +MacroCallReconstructor::MacroCallReconstructor( + unsigned Level, + const llvm::DenseMap> + &ActiveExpansions) + : Level(Level), IdToReconstructed(ActiveExpansions) { + Result.Tokens.push_back(std::make_unique()); + ActiveReconstructedLines.push_back(&Result); +} + +void MacroCallReconstructor::addLine(const UnwrappedLine &Line) { + assert(State != Finalized); + LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n"); + forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) { + add(Token, Parent, First); + }); + assert(InProgress || finished()); +} + +UnwrappedLine MacroCallReconstructor::takeResult() && { + finalize(); + assert(Result.Tokens.size() == 1 && Result.Tokens.front()->Children.size() == 1); + UnwrappedLine Final = + createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level); + assert(!Final.Tokens.empty()); + return Final; +} + +// Reconstruct the position of the next \p Token, given its parent \p +// ExpandedParent in the incoming unwrapped line. \p First specifies whether it +// is the first token in a given unwrapped line. +void MacroCallReconstructor::add(FormatToken *Token, + FormatToken *ExpandedParent, bool First) { + LLVM_DEBUG( + llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: " + << (ExpandedParent ? ExpandedParent->TokenText : "") + << ", First: " << First << "\n"); + // In order to be able to find the correct parent in the reconstructed token + // stream, we need to continue the last open reconstruction until we find the + // given token if it is part of the reconstructed token stream. + // + // Note that hidden tokens can be part of the reconstructed stream in nested + // macro calls. + // For example, given + // #define C(x, y) x y + // #define B(x) {x} + // And the call: + // C(a, B(b)) + // The outer macro call will be C(a, {b}), and the hidden token '}' can be + // found in the reconstructed token stream of that expansion level. + // In the expanded token stream + // a {b} + // 'b' is a child of '{'. We need to continue the open expansion of the ',' + // in the call of 'C' in order to correctly set the ',' as the parent of '{', + // so we later set the spelled token 'b' as a child of the ','. + if (!ActiveExpansions.empty() && Token->MacroCtx && + (Token->MacroCtx->Role != MR_Hidden || + ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) { + if (bool PassedMacroComma = reconstructActiveCallUntil(Token)) + First = true; + } + + prepareParent(ExpandedParent, First); + + if (Token->MacroCtx) { + // If this token was generated by a macro call, add the reconstructed + // equivalent of the token. + reconstruct(Token); + } else { + // Otherwise, we add it to the current line. + appendToken(Token); + } +} + +// Adjusts the stack of active reconstructed lines so we're ready to push +// tokens. The tokens to be pushed are children of ExpandedParent in the +// expanded code. +// +// This may entail: +// - creating a new line, if the parent is on the active line +// - popping active lines, if the parent is further up the stack +// +// Postcondition: +// ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its +// reconstructed replacement token as a parent (when possible) - that is, the +// last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2] +// is the parent of ActiveReconstructedLines.back() in the reconstructed +// unwrapped line. +void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent, + bool NewLine) { + LLVM_DEBUG({ + llvm::dbgs() << "ParentMap:\n"; + debugParentMap(); + }); + // We want to find the parent in the new unwrapped line, where the expanded + // parent might have been replaced during reconstruction. + FormatToken *Parent = getParentInResult(ExpandedParent); + LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: " + << (Parent ? Parent->TokenText : "") << "\n"); + + FormatToken *OpenMacroParent = nullptr; + if (!MacroCallStructure.empty()) { + // Inside a macro expansion, it is possible to lose track of the correct + // parent - either because it is already popped, for example because it was + // in a different macro argument (e.g. M({, })), or when we work on invalid + // code. + // Thus, we use the innermost macro call's parent as the parent at which + // we stop; this allows us to stay within the macro expansion and keeps + // any problems confined to the extent of the macro call. + OpenMacroParent = + getParentInResult(MacroCallStructure.back().MacroCallLParen); + LLVM_DEBUG(llvm::dbgs() + << "MacroCallLParen: " + << MacroCallStructure.back().MacroCallLParen->TokenText + << ", OpenMacroParent: " + << (OpenMacroParent ? OpenMacroParent->TokenText : "") + << "\n"); + } + if (NewLine || + (!ActiveReconstructedLines.back()->Tokens.empty() && + Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) { + // If we are at the first token in a new line, we want to also + // create a new line in the resulting reconstructed unwrapped line. + while (ActiveReconstructedLines.back()->Tokens.empty() || + (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok && + ActiveReconstructedLines.back()->Tokens.back()->Tok != + OpenMacroParent)) { + ActiveReconstructedLines.pop_back(); + assert(!ActiveReconstructedLines.empty()); + } + assert(!ActiveReconstructedLines.empty()); + ActiveReconstructedLines.back()->Tokens.back()->Children.push_back( + std::make_unique()); + ActiveReconstructedLines.push_back( + &*ActiveReconstructedLines.back()->Tokens.back()->Children.back()); + } else if (parentLine().Tokens.back()->Tok != Parent) { + // If we're not the first token in a new line, pop lines until we find + // the child of \c Parent in the stack. + while (Parent != parentLine().Tokens.back()->Tok && + parentLine().Tokens.back()->Tok && + parentLine().Tokens.back()->Tok != OpenMacroParent) { + ActiveReconstructedLines.pop_back(); + assert(!ActiveReconstructedLines.empty()); + } + } + assert(!ActiveReconstructedLines.empty()); +} + +// For a given \p Parent in the incoming expanded token stream, find the +// corresponding parent in the output. +FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) { + FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent); + if (!Mapped) + return Parent; + for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent)) { + Parent = Mapped; + } + // If we use a different token than the parent in the expanded token stream + // as parent, mark it as a special parent, so the formatting code knows it + // needs to have its children formatted. + Parent->MacroParent = true; + return Parent; +} + +// Reconstruct a \p Token that was expanded from a macro call. +void MacroCallReconstructor::reconstruct(FormatToken *Token) { + assert(Token->MacroCtx); + // A single token can be the only result of a macro call: + // Given: #define ID(x, y) ; + // And the call: ID(, ) + // ';' in the expanded stream will reconstruct all of ID(, ). + if (Token->MacroCtx->StartOfExpansion) { + startReconstruction(Token); + // If the order of tokens in the expanded token stream is not the + // same as the order of tokens in the reconstructed stream, we need + // to reconstruct tokens that arrive later in the stream. + if (Token->MacroCtx->Role != MR_Hidden) { + reconstructActiveCallUntil(Token); + } + } + assert(!ActiveExpansions.empty()); + if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) { + assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size()); + if (Token->MacroCtx->Role != MR_Hidden) { + // The current token in the reconstructed token stream must be the token + // we're looking for - we either arrive here after startReconstruction, + // which initiates the stream to the first token, or after + // continueReconstructionUntil skipped until the expected token in the + // reconstructed stream at the start of add(...). + assert(ActiveExpansions.back().SpelledI->Tok == Token); + processNextReconstructed(); + } else if (!currentLine()->Tokens.empty()) { + // Map all hidden tokens to the last visible token in the output. + // If the hidden token is a parent, we'll use the last visible + // token as the parent of the hidden token's children. + SpelledParentToReconstructedParent[Token] = + currentLine()->Tokens.back()->Tok; + } else { + for (auto I = ActiveReconstructedLines.rbegin(), + E = ActiveReconstructedLines.rend(); + I != E; ++I) { + if (!(*I)->Tokens.empty()) { + SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok; + break; + } + } + } + } + if (Token->MacroCtx->EndOfExpansion) + endReconstruction(Token); +} + +// Given a \p Token that starts an expansion, reconstruct the beginning of the +// macro call. +// For example, given: #define ID(x) x +// And the call: ID(int a) +// Reconstructs: ID( +void MacroCallReconstructor::startReconstruction(FormatToken *Token) { + assert(Token->MacroCtx); + assert(!Token->MacroCtx->ExpandedFrom.empty()); + assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size()); +#ifndef NDEBUG + // Check that the token's reconstruction stack matches our current + // reconstruction stack. + for (size_t I = 0; I < ActiveExpansions.size(); ++I) { + assert(ActiveExpansions[I].ID == + Token->MacroCtx + ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]); + } +#endif + // Start reconstruction for all calls for which this token is the first token + // generated by the call. + // Note that the token's expanded from stack is inside-to-outside, and the + // expansions for which this token is not the first are the outermost ones. + ArrayRef StartedMacros = + makeArrayRef(Token->MacroCtx->ExpandedFrom) + .drop_back(ActiveExpansions.size()); + assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion); + // We reconstruct macro calls outside-to-inside. + for (FormatToken *ID : llvm::reverse(StartedMacros)) { + // We found a macro call to be reconstructed; the next time our + // reconstruction stack is empty we know we finished an reconstruction. +#ifndef NDEBUG + State = InProgress; +#endif + // Put the reconstructed macro call's token into our reconstruction stack. + auto IU = IdToReconstructed.find(ID); + assert(IU != IdToReconstructed.end()); + ActiveExpansions.push_back( + {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()}); + // Process the macro call's identifier. + processNextReconstructed(); + if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE) + continue; + if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) { + // Process the optional opening parenthesis. + processNextReconstructed(); + } + } +} + +// Add all tokens in the reconstruction stream to the output until we find the +// given \p Token. +bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) { + assert(!ActiveExpansions.empty()); + bool PassedMacroComma = false; + // FIXME: If Token was already expanded earlier, due to + // a change in order, we will not find it, but need to + // skip it. + while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE && + ActiveExpansions.back().SpelledI->Tok != Token) { + PassedMacroComma = processNextReconstructed() || PassedMacroComma; + } + return PassedMacroComma; +} + +// End all reconstructions for which \p Token is the final token. +void MacroCallReconstructor::endReconstruction(FormatToken *Token) { + assert(Token->MacroCtx && + (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion)); + for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) { +#ifndef NDEBUG + // Check all remaining tokens but the final closing parenthesis and optional + // trailing comment were already reconstructed at an inner expansion level. + for (auto T = ActiveExpansions.back().SpelledI; + T != ActiveExpansions.back().SpelledE; ++T) { + FormatToken *Token = T->Tok; + bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE || + std::next(T)->Tok->isTrailingComment()) && + !Token->MacroCtx && Token->is(tok::r_paren); + bool TrailingComment = Token->isTrailingComment(); + bool PreviousLevel = + Token->MacroCtx && + (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()); + if (!ClosingParen && !TrailingComment && !PreviousLevel) { + llvm::dbgs() << "At token: " << Token->TokenText << "\n"; + } + // In addition to the following cases, we can also run into this + // when a macro call had more arguments than expected; in that case, + // the comma and the remaining tokens in the macro call will potentially + // end up in the line when we finish the expansion. + // FIXME: Add the information which arguments are unused, and assert + // one of the cases below plus reconstructed macro argument tokens. + // assert(ClosingParen || TrailingComment || PreviousLevel); + } +#endif + // Handle the remaining open tokens: + // - expand the closing parenthesis, if it exists, including an optional + // trailing comment + // - handle tokens that were already reconstructed at an inner expansion + // level + // - handle tokens when a macro call had more than the expected number of + // arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end + // up with the sequence ", b, c)" being open at the end of the + // reconstruction; we want to gracefully handle that case + // + // FIXME: See the above debug-check for what we will need to do to be + // able to assert this. + for (auto T = ActiveExpansions.back().SpelledI; + T != ActiveExpansions.back().SpelledE; ++T) { + processNextReconstructed(); + } + ActiveExpansions.pop_back(); + } +} + +void MacroCallReconstructor::debugParentMap() const { + llvm::DenseSet Values; + for (const auto &P : SpelledParentToReconstructedParent) + Values.insert(P.second); + + for (const auto &P : SpelledParentToReconstructedParent) { + if (Values.contains(P.first)) + continue; + llvm::dbgs() << (P.first ? P.first->TokenText : ""); + for (auto I = SpelledParentToReconstructedParent.find(P.first), + E = SpelledParentToReconstructedParent.end(); + I != E; I = SpelledParentToReconstructedParent.find(I->second)) { + llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : ""); + } + llvm::dbgs() << "\n"; + } +} + +// If visible, add the next token of the reconstructed token sequence to the +// output. Returns whether reconstruction passed a comma that is part of a +// macro call. +bool MacroCallReconstructor::processNextReconstructed() { + FormatToken *Token = ActiveExpansions.back().SpelledI->Tok; + ++ActiveExpansions.back().SpelledI; + if (Token->MacroCtx) { + // Skip tokens that are not part of the macro call. + if (Token->MacroCtx->Role == MR_Hidden) { + return false; + } + // Skip tokens we already expanded during an inner reconstruction. + // For example, given: #define ID(x) {x} + // And the call: ID(ID(f)) + // We get two reconstructions: + // ID(f) -> {f} + // ID({f}) -> {{f}} + // We reconstruct f during the first reconstruction, and skip it during the + // second reconstruction. + if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()) { + return false; + } + } + // Tokens that do not have a macro context are tokens in that are part of the + // macro call that have not taken part in expansion. + if (!Token->MacroCtx) { + // Put the parentheses and commas of a macro call into the same line; + // if the arguments produce new unwrapped lines, they will become children + // of the corresponding opening parenthesis or comma tokens in the + // reconstructed call. + if (Token->is(tok::l_paren)) { + MacroCallStructure.push_back(MacroCallState( + currentLine(), parentLine().Tokens.back()->Tok, Token)); + // All tokens that are children of the previous line's last token in the + // reconstructed token stream will now be children of the l_paren token. + // For example, for the line containing the macro calls: + // auto x = ID({ID(2)}); + // We will build up a map -> ( -> ( with the first and second + // l_paren of the macro call respectively. New lines that come in with a + // parent will then become children of the l_paren token of the + // currently innermost macro call. + SpelledParentToReconstructedParent[MacroCallStructure.back() + .ParentLastToken] = Token; + appendToken(Token); + prepareParent(Token, /*NewLine=*/true); + Token->MacroParent = true; + return false; + } + if (!MacroCallStructure.empty()) { + if (Token->is(tok::comma)) { + // Make new lines inside the next argument children of the comma token. + SpelledParentToReconstructedParent + [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token; + Token->MacroParent = true; + appendToken(Token, MacroCallStructure.back().Line); + prepareParent(Token, /*NewLine=*/true); + return true; + } + if (Token->is(tok::r_paren)) { + appendToken(Token, MacroCallStructure.back().Line); + SpelledParentToReconstructedParent.erase( + MacroCallStructure.back().ParentLastToken); + MacroCallStructure.pop_back(); + return false; + } + } + } + // Note that any tokens that are tagged with MR_None have been passed as + // arguments to the macro that have not been expanded, for example: + // Given: #define ID(X) x + // When calling: ID(a, b) + // 'b' will be part of the reconstructed token stream, but tagged MR_None. + // Given that erroring out in this case would be disruptive, we continue + // pushing the (unformatted) token. + // FIXME: This can lead to unfortunate formatting decisions - give the user + // a hint that their macro definition is broken. + appendToken(Token); + return false; +} + +void MacroCallReconstructor::finalize() { +#ifndef NDEBUG + assert(State != Finalized && finished()); + State = Finalized; +#endif + + // We created corresponding unwrapped lines for each incoming line as children + // the the toplevel null token. + assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty()); + LLVM_DEBUG({ + llvm::dbgs() << "Finalizing reconstructed lines:\n"; + debug(Result, 0); + }); + + // The first line becomes the top level line in the resulting unwrapped line. + LineNode &Top = *Result.Tokens.front(); + auto *I = Top.Children.begin(); + // Every subsequent line will become a child of the last token in the previous + // line, which is the token prior to the first token in the line. + LineNode *Last = (*I)->Tokens.back().get(); + ++I; + for (auto *E = Top.Children.end(); I != E; ++I) { + assert(Last->Children.empty()); + Last->Children.push_back(std::move(*I)); + + // Mark the previous line's last token as generated by a macro expansion + // so the formatting algorithm can take that into account. + Last->Tok->MacroParent = true; + + Last = Last->Children.back()->Tokens.back().get(); + } + Top.Children.resize(1); +} + +void MacroCallReconstructor::appendToken(FormatToken *Token, Line *L) { + L = L ? L : currentLine(); + LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n"); + L->Tokens.push_back(std::make_unique(Token)); +} + +UnwrappedLine MacroCallReconstructor::createUnwrappedLine(const Line &Line, + int Level) { + UnwrappedLine Result; + Result.Level = Level; + for (const auto &N : Line.Tokens) { + Result.Tokens.push_back(N->Tok); + UnwrappedLineNode &Current = Result.Tokens.back(); + for (const auto &Child : N->Children) { + if (Child->Tokens.empty()) + continue; + Current.Children.push_back(createUnwrappedLine(*Child, Level + 1)); + } + if (Current.Children.size() == 1 && + Current.Tok->isOneOf(tok::l_paren, tok::comma)) { + Result.Tokens.splice(Result.Tokens.end(), + Current.Children.front().Tokens); + Current.Children.clear(); + } + } + return Result; +} + +void MacroCallReconstructor::debug(const Line &Line, int Level) { + for (int i = 0; i < Level; ++i) + llvm::dbgs() << " "; + for (const auto &N : Line.Tokens) { + if (!N) + continue; + if (N->Tok) + llvm::dbgs() << N->Tok->TokenText << " "; + for (const auto &Child : N->Children) { + llvm::dbgs() << "\n"; + debug(*Child, Level + 1); + for (int i = 0; i < Level; ++i) + llvm::dbgs() << " "; + } + } + llvm::dbgs() << "\n"; +} + +MacroCallReconstructor::Line &MacroCallReconstructor::parentLine() { + return **std::prev(std::prev(ActiveReconstructedLines.end())); +} + +MacroCallReconstructor::Line *MacroCallReconstructor::currentLine() { + return ActiveReconstructedLines.back(); +} + +MacroCallReconstructor::MacroCallState::MacroCallState( + MacroCallReconstructor::Line *Line, FormatToken *ParentLastToken, + FormatToken *MacroCallLParen) + : Line(Line), ParentLastToken(ParentLastToken), + MacroCallLParen(MacroCallLParen) { + LLVM_DEBUG( + llvm::dbgs() << "ParentLastToken: " + << (ParentLastToken ? ParentLastToken->TokenText : "") + << "\n"); + + assert(MacroCallLParen->is(tok::l_paren)); +} + +} // namespace format +} // namespace clang diff --git a/clang/lib/Format/Macros.h b/clang/lib/Format/Macros.h --- a/clang/lib/Format/Macros.h +++ b/clang/lib/Format/Macros.h @@ -1,4 +1,4 @@ -//===--- MacroExpander.h - Format C++ code ----------------------*- C++ -*-===// +//===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -22,40 +22,38 @@ /// spelled token streams into expanded token streams when it encounters a /// macro call. The UnwrappedLineParser continues to parse UnwrappedLines /// from the expanded token stream. -/// After the expanded unwrapped lines are parsed, the MacroUnexpander matches -/// the spelled token stream into unwrapped lines that best resemble the -/// structure of the expanded unwrapped lines. +/// After the expanded unwrapped lines are parsed, the MacroCallReconstructor +/// matches the spelled token stream into unwrapped lines that best resemble the +/// structure of the expanded unwrapped lines. These reconstructed unwrapped +/// lines are aliasing the tokens in the expanded token stream, so that token +/// annotations will be reused when formatting the spelled macro calls. /// -/// When formatting, clang-format formats the expanded unwrapped lines first, -/// determining the token types. Next, it formats the spelled unwrapped lines, -/// keeping the token types fixed, while allowing other formatting decisions -/// to change. +/// When formatting, clang-format annotates and formats the expanded unwrapped +/// lines first, determining the token types. Next, it formats the spelled +/// unwrapped lines, keeping the token types fixed, while allowing other +/// formatting decisions to change. /// //===----------------------------------------------------------------------===// #ifndef CLANG_LIB_FORMAT_MACROS_H #define CLANG_LIB_FORMAT_MACROS_H +#include +#include #include -#include #include -#include "Encoding.h" #include "FormatToken.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -namespace llvm { -class MemoryBuffer; -} // namespace llvm - namespace clang { -class IdentifierTable; -class SourceManager; - namespace format { -struct FormatStyle; + +struct UnwrappedLine; +struct UnwrappedLineNode; /// Takes a set of macro definitions as strings and allows expanding calls to /// those macros. @@ -134,6 +132,249 @@ llvm::StringMap Definitions; }; +/// Converts a sequence of UnwrappedLines containing expanded macros into a +/// single UnwrappedLine containing the macro calls. This UnwrappedLine may be +/// broken into child lines, in a way that best conveys the structure of the +/// expanded code. +/// +/// In the simplest case, a spelled UnwrappedLine contains one macro, and after +/// expanding it we have one expanded UnwrappedLine. In general, macro +/// expansions can span UnwrappedLines, and multiple macros can contribute +/// tokens to the same line. We keep consuming expanded lines until: +/// * all expansions that started have finished (we're not chopping any macros +/// in half) +/// * *and* we've reached the end of a *spelled* unwrapped line. +/// +/// A single UnwrappedLine represents this chunk of code. +/// +/// After this point, the state of the spelled/expanded stream is "in sync" +/// (both at the start of an UnwrappedLine, with no macros open), so the +/// Unexpander can be thrown away and parsing can continue. +/// +/// Given a mapping from the macro name identifier token in the macro call +/// to the tokens of the macro call, for example: +/// CLASSA -> CLASSA({public: void x();}) +/// +/// When getting the formatted lines of the expansion via the \c addLine method +/// (each '->' specifies a call to \c addLine ): +/// -> class A { +/// -> public: +/// -> void x(); +/// -> }; +/// +/// Creates the tree of unwrapped lines containing the macro call tokens so that +/// the macro call tokens fit the semantic structure of the expanded formatted +/// lines: +/// -> CLASSA({ +/// -> public: +/// -> void x(); +/// -> }) +class MacroCallReconstructor { +public: + /// Create an Reconstructor whose resulting \p UnwrappedLine will start at + /// \p Level, using the map from name identifier token to the corresponding + /// tokens of the spelled macro call. + MacroCallReconstructor( + unsigned Level, + const llvm::DenseMap> + &ActiveExpansions); + + /// For the given \p Line, match all occurences of tokens expanded from a + /// macro to unwrapped lines in the spelled macro call so that the resulting + /// tree of unwrapped lines best resembles the structure of unwrapped lines + /// passed in via \c addLine. + void addLine(const UnwrappedLine &Line); + + /// Check whether at the current state there is no open macro expansion + /// that needs to be processed to finish an macro call. + /// Only when \c finished() is true, \c takeResult() can be called to retrieve + /// the resulting \c UnwrappedLine. + /// If there are multiple subsequent macro calls within an unwrapped line in + /// the spelled token stream, the calling code may also continue to call + /// \c addLine() when \c finished() is true. + bool finished() const { return ActiveExpansions.empty(); } + + /// Retrieve the formatted \c UnwrappedLine containing the orginal + /// macro calls, formatted according to the expanded token stream received + /// via \c addLine(). + /// Generally, this line tries to have the same structure as the expanded, + /// formatted unwrapped lines handed in via \c addLine(), with the exception + /// that for multiple top-level lines, each subsequent line will be the + /// child of the last token in its predecessor. This representation is chosen + /// because it is a precondition to the formatter that we get what looks like + /// a single statement in a single \c UnwrappedLine (i.e. matching parens). + /// + /// If a token in a macro argument is a child of a token in the expansion, + /// the parent will be the corresponding token in the macro call. + /// For example: + /// #define C(a, b) class C { a b + /// C(int x;, int y;) + /// would expand to + /// class C { int x; int y; + /// where in a formatted line "int x;" and "int y;" would both be new separate + /// lines. + /// + /// In the result, "int x;" will be a child of the opening parenthesis in "C(" + /// and "int y;" will be a child of the "," token: + /// C ( + /// \- int x; + /// , + /// \- int y; + /// ) + UnwrappedLine takeResult() &&; + +private: + void add(FormatToken *Token, FormatToken *ExpandedParent, bool First); + void prepareParent(FormatToken *ExpandedParent, bool First); + FormatToken *getParentInResult(FormatToken *Parent); + void reconstruct(FormatToken *Token); + void startReconstruction(FormatToken *Token); + bool reconstructActiveCallUntil(FormatToken *Token); + void endReconstruction(FormatToken *Token); + bool processNextReconstructed(); + void finalize(); + + struct Line; + + void appendToken(FormatToken *Token, Line *L = nullptr); + UnwrappedLine createUnwrappedLine(const Line &Line, int Level); + void debug(const Line &Line, int Level); + Line &parentLine(); + Line *currentLine(); + void debugParentMap() const; + +#ifndef NDEBUG + enum ReconstructorState { + Start, // No macro expansion was found in the input yet. + InProgress, // During a macro reconstruction. + Finalized, // Past macro reconstruction, the result is finalized. + }; + ReconstructorState State = Start; +#endif + + // Node in which we build up the resulting unwrapped line; this type is + // analogous to UnwrappedLineNode. + struct LineNode { + LineNode() = default; + LineNode(FormatToken *Tok) : Tok(Tok) {} + FormatToken *Tok = nullptr; + llvm::SmallVector> Children; + }; + + // Line in which we build up the resulting unwrapped line. + // FIXME: Investigate changing UnwrappedLine to a pointer type and using it + // instead of rolling our own type. + struct Line { + llvm::SmallVector> Tokens; + }; + + // The line in which we collect the resulting reconstructed output. + // To reduce special cases in the algorithm, the first level of the line + // contains a single null token that has the reconstructed incoming + // lines as children. + // In the end, we stich the lines together so that each subsequent line + // is a child of the last token of the previous line. This is necessary + // in order to format the overall expression as a single logical line - + // if we created separate lines, we'd format them with their own top-level + // indent depending on the semantic structure, which is not desired. + Line Result; + + // Stack of currently "open" lines, where each line's predecessor's last + // token is the parent token for that line. + llvm::SmallVector ActiveReconstructedLines; + + // Maps from the expanded token to the token that takes its place in the + // reconstructed token stream in terms of parent-child relationships. + // Note that it might take multiple steps to arrive at the correct + // parent in the output. + // Given: #define C(a, b) []() { a; b; } + // And a call: C(f(), g()) + // The structure in the incoming formatted unwrapped line will be: + // []() { + // |- f(); + // \- g(); + // } + // with f and g being children of the opening brace. + // In the reconstructed call: + // C(f(), g()) + // \- f() + // \- g() + // We want f to be a child of the opening parenthesis and g to be a child + // of the comma token in the macro call. + // Thus, we map + // { -> ( + // and add + // ( -> , + // once we're past the comma in the reconstruction. + llvm::DenseMap + SpelledParentToReconstructedParent; + + // Keeps track of a single expansion while we're reconstructing tokens it + // generated. + struct Expansion { + // The identifier token of the macro call. + FormatToken *ID; + // Our current position in the reconstruction. + std::list::iterator SpelledI; + // The end of the reconstructed token sequence. + std::list::iterator SpelledE; + }; + + // Stack of macro calls for which we're in the middle of an expansion. + llvm::SmallVector ActiveExpansions; + + struct MacroCallState { + MacroCallState(Line *Line, FormatToken *ParentLastToken, + FormatToken *MacroCallLParen); + + Line *Line; + + // The last token in the parent line or expansion, or nullptr if the macro + // expansion is on a top-level line. + // + // For example, in the macro call: + // auto f = []() { ID(1); }; + // The MacroCallState for ID will have '{' as ParentLastToken. + // + // In the macro call: + // ID(ID(void f())); + // The MacroCallState of the outer ID will have nullptr as ParentLastToken, + // while the MacroCallState for the inner ID will have the '(' of the outer + // ID as ParentLastToken. + // + // In the macro call: + // ID2(a, ID(b)); + // The MacroCallState of ID will have ',' as ParentLastToken. + FormatToken *ParentLastToken; + + // The l_paren of this MacroCallState's macro call. + FormatToken *MacroCallLParen; + }; + + // Keeps track of the lines into which the opening brace/parenthesis & + // argument separating commas for each level in the macro call go in order to + // put the corresponding closing brace/parenthesis into the same line in the + // output and keep track of which parents in the expanded token stream map to + // which tokens in the reconstructed stream. + // When an opening brace/parenthesis has children, we want the structure of + // the output line to be: + // |- MACRO + // |- ( + // | \- + // |- , + // | \- + // \- ) + llvm::SmallVector MacroCallStructure; + + // Level the generated UnwrappedLine will be at. + const unsigned Level; + + // Maps from identifier of the macro call to an unwrapped line containing + // all tokens of the macro call. + const llvm::DenseMap> + &IdToReconstructed; +}; + } // namespace format } // namespace clang diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -20,6 +20,7 @@ #include "clang/Format/Format.h" #include "llvm/ADT/BitVector.h" #include "llvm/Support/Regex.h" +#include #include #include @@ -38,7 +39,7 @@ UnwrappedLine(); /// The \c Tokens comprising this \c UnwrappedLine. - std::vector Tokens; + std::list Tokens; /// The indent level of the \c UnwrappedLine. unsigned Level; diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt --- a/clang/unittests/Format/CMakeLists.txt +++ b/clang/unittests/Format/CMakeLists.txt @@ -18,6 +18,7 @@ FormatTestTableGen.cpp FormatTestTextProto.cpp FormatTestVerilog.cpp + MacroCallReconstructorTest.cpp MacroExpanderTest.cpp NamespaceEndCommentsFixerTest.cpp QualifierFixerTest.cpp diff --git a/clang/unittests/Format/MacroCallReconstructorTest.cpp b/clang/unittests/Format/MacroCallReconstructorTest.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Format/MacroCallReconstructorTest.cpp @@ -0,0 +1,688 @@ +#include "../../lib/Format/Macros.h" +#include "../../lib/Format/UnwrappedLineParser.h" +#include "TestLexer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include +#include +#include + +namespace clang { +namespace format { +namespace { + +using UnexpandedMap = + llvm::DenseMap>; + +// Keeps track of a sequence of macro expansions. +// +// The expanded tokens are accessible via getTokens(), while a map of macro call +// identifier token to unexpanded token stream is accessible via +// getUnexpanded(). +class Expansion { +public: + Expansion(TestLexer &Lex, MacroExpander &Macros) : Lex(Lex), Macros(Macros) {} + + // Appends the token stream obtained from expanding the macro Name given + // the provided arguments, to be later retrieved with getTokens(). + // Returns the list of tokens making up the unexpanded macro call. + TokenList + expand(llvm::StringRef Name, + const SmallVector, 1> &Args) { + auto *ID = Lex.id(Name); + auto UnexpandedLine = std::make_unique(); + UnexpandedLine->Tokens.push_back(ID); + if (!Args.empty()) { + UnexpandedLine->Tokens.push_back(Lex.id("(")); + for (auto I = Args.begin(), E = Args.end(); I != E; ++I) { + if (I != Args.begin()) + UnexpandedLine->Tokens.push_back(Lex.id(",")); + UnexpandedLine->Tokens.insert(UnexpandedLine->Tokens.end(), I->begin(), + I->end()); + } + UnexpandedLine->Tokens.push_back(Lex.id(")")); + } + Unexpanded[ID] = std::move(UnexpandedLine); + + auto Expanded = uneof(Macros.expand(ID, Args)); + Tokens.append(Expanded.begin(), Expanded.end()); + + TokenList UnexpandedTokens; + for (const UnwrappedLineNode &Node : Unexpanded[ID]->Tokens) { + UnexpandedTokens.push_back(Node.Tok); + } + return UnexpandedTokens; + } + + TokenList expand(llvm::StringRef Name, + const std::vector &Args = {}) { + return expand(Name, lexArgs(Args)); + } + + const UnexpandedMap &getUnexpanded() const { return Unexpanded; } + + const TokenList &getTokens() const { return Tokens; } + +private: + llvm::SmallVector + lexArgs(const std::vector &Args) { + llvm::SmallVector Result; + for (const auto &Arg : Args) { + Result.push_back(uneof(Lex.lex(Arg))); + } + return Result; + } + llvm::DenseMap> Unexpanded; + llvm::SmallVector Tokens; + TestLexer &Lex; + MacroExpander &Macros; +}; + +struct Chunk { + Chunk(llvm::ArrayRef Tokens) + : Tokens(Tokens.begin(), Tokens.end()) {} + Chunk(llvm::ArrayRef Children) + : Children(Children.begin(), Children.end()) {} + llvm::SmallVector Tokens; + llvm::SmallVector Children; +}; + +bool tokenMatches(const FormatToken *Left, const FormatToken *Right) { + if (Left->getType() == Right->getType() && + Left->TokenText == Right->TokenText) + return true; + llvm::dbgs() << Left->TokenText << " != " << Right->TokenText << "\n"; + return false; +} + +// Allows to produce chunks of a token list by typing the code of equal tokens. +// +// Created from a list of tokens, users call "consume" to get the next chunk +// of tokens, checking that they match the written code. +struct Matcher { + Matcher(const TokenList &Tokens, TestLexer &Lex) + : Tokens(Tokens), It(this->Tokens.begin()), Lex(Lex) {} + + Chunk consume(StringRef Tokens) { + TokenList Result; + for (const FormatToken *Token : uneof(Lex.lex(Tokens))) { + assert(tokenMatches(*It, Token)); + Result.push_back(*It); + ++It; + } + return Chunk(Result); + } + + TokenList Tokens; + TokenList::iterator It; + TestLexer &Lex; +}; + +UnexpandedMap mergeUnexpanded(const UnexpandedMap &M1, + const UnexpandedMap &M2) { + UnexpandedMap Result; + for (const auto &KV : M1) { + Result[KV.first] = std::make_unique(*KV.second); + } + for (const auto &KV : M2) { + Result[KV.first] = std::make_unique(*KV.second); + } + return Result; +} + +class MacroCallReconstructorTest : public ::testing::Test { +public: + MacroCallReconstructorTest() : Lex(Allocator, Buffers) {} + + std::unique_ptr + createExpander(const std::vector &MacroDefinitions) { + return std::make_unique(MacroDefinitions, + Lex.SourceMgr.get(), Lex.Style, + Lex.Allocator, Lex.IdentTable); + } + + UnwrappedLine line(llvm::ArrayRef Tokens) { + UnwrappedLine Result; + for (FormatToken *Tok : Tokens) { + Result.Tokens.push_back(UnwrappedLineNode(Tok)); + } + return Result; + } + + UnwrappedLine line(llvm::StringRef Text) { return line({lex(Text)}); } + + UnwrappedLine line(llvm::ArrayRef Chunks) { + UnwrappedLine Result; + for (const Chunk &Chunk : Chunks) { + Result.Tokens.insert(Result.Tokens.end(), Chunk.Tokens.begin(), + Chunk.Tokens.end()); + assert(!Result.Tokens.empty()); + Result.Tokens.back().Children.append(Chunk.Children.begin(), + Chunk.Children.end()); + } + return Result; + } + + TokenList lex(llvm::StringRef Text) { return uneof(Lex.lex(Text)); } + + Chunk tokens(llvm::StringRef Text) { return Chunk(lex(Text)); } + + Chunk children(llvm::ArrayRef Children) { + return Chunk(Children); + } + + llvm::SpecificBumpPtrAllocator Allocator; + std::vector> Buffers; + TestLexer Lex; +}; + +bool matchesTokens(const UnwrappedLine &L1, const UnwrappedLine &L2) { + if (L1.Tokens.size() != L2.Tokens.size()) + return false; + for (auto L1It = L1.Tokens.begin(), L2It = L2.Tokens.begin(); + L1It != L1.Tokens.end(); ++L1It, ++L2It) { + if (L1It->Tok != L2It->Tok) + return false; + if (L1It->Children.size() != L2It->Children.size()) + return false; + for (auto L1ChildIt = L1It->Children.begin(), + L2ChildIt = L2It->Children.begin(); + L1ChildIt != L1It->Children.end(); ++L1ChildIt, ++L2ChildIt) { + if (!matchesTokens(*L1ChildIt, *L2ChildIt)) + return false; + } + } + return true; +} +MATCHER_P(matchesLine, line, "") { return matchesTokens(arg, line); } + +TEST_F(MacroCallReconstructorTest, Identifier) { + auto Macros = createExpander({"X=x"}); + Expansion Exp(Lex, *Macros); + TokenList Call = Exp.expand("X"); + + MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); + Unexp.addLine(line(Exp.getTokens())); + EXPECT_TRUE(Unexp.finished()); + Matcher U(Call, Lex); + EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(line(U.consume("X")))); +} + +TEST_F(MacroCallReconstructorTest, NestedLineWithinCall) { + auto Macros = createExpander({"C(a)=class X { a; };"}); + Expansion Exp(Lex, *Macros); + TokenList Call = Exp.expand("C", {"void f()"}); + + MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); + Matcher E(Exp.getTokens(), Lex); + Unexp.addLine(line(E.consume("class X {"))); + EXPECT_FALSE(Unexp.finished()); + Unexp.addLine(line(E.consume("void f();"))); + EXPECT_FALSE(Unexp.finished()); + Unexp.addLine(line(E.consume("};"))); + EXPECT_TRUE(Unexp.finished()); + Matcher U(Call, Lex); + EXPECT_THAT(std::move(Unexp).takeResult(), + matchesLine(line(U.consume("C(void f())")))); +} + +TEST_F(MacroCallReconstructorTest, MultipleLinesInNestedMultiParamsExpansion) { + auto Macros = createExpander({"C(a, b)=a b", "B(a)={a}"}); + Expansion Exp1(Lex, *Macros); + TokenList Call1 = Exp1.expand("B", {"b"}); + Expansion Exp2(Lex, *Macros); + TokenList Call2 = Exp2.expand("C", {uneof(Lex.lex("a")), Exp1.getTokens()}); + + UnexpandedMap Unexpanded = + mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded()); + MacroCallReconstructor Unexp(0, Unexpanded); + Matcher E(Exp2.getTokens(), Lex); + Unexp.addLine(line(E.consume("a"))); + EXPECT_FALSE(Unexp.finished()); + Unexp.addLine(line(E.consume("{"))); + EXPECT_FALSE(Unexp.finished()); + Unexp.addLine(line(E.consume("b"))); + EXPECT_FALSE(Unexp.finished()); + Unexp.addLine(line(E.consume("}"))); + EXPECT_TRUE(Unexp.finished()); + + Matcher U1(Call1, Lex); + auto Middle = U1.consume("B(b)"); + Matcher U2(Call2, Lex); + auto Chunk1 = U2.consume("C(a, "); + auto Chunk2 = U2.consume("{ b }"); + auto Chunk3 = U2.consume(")"); + + EXPECT_THAT(std::move(Unexp).takeResult(), + matchesLine(line({Chunk1, Middle, Chunk3}))); +} + +TEST_F(MacroCallReconstructorTest, StatementSequence) { + auto Macros = createExpander({"SEMI=;"}); + Expansion Exp(Lex, *Macros); + TokenList Call1 = Exp.expand("SEMI"); + TokenList Call2 = Exp.expand("SEMI"); + TokenList Call3 = Exp.expand("SEMI"); + + MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); + Matcher E(Exp.getTokens(), Lex); + Unexp.addLine(line(E.consume(";"))); + EXPECT_TRUE(Unexp.finished()); + Unexp.addLine(line(E.consume(";"))); + EXPECT_TRUE(Unexp.finished()); + Unexp.addLine(line(E.consume(";"))); + EXPECT_TRUE(Unexp.finished()); + Matcher U1(Call1, Lex); + Matcher U2(Call2, Lex); + Matcher U3(Call3, Lex); + EXPECT_THAT(std::move(Unexp).takeResult(), + matchesLine(line( + {U1.consume("SEMI"), + children({line({U2.consume("SEMI"), + children({line(U3.consume("SEMI"))})})})}))); +} + +TEST_F(MacroCallReconstructorTest, NestedBlock) { + auto Macros = createExpander({"ID(x)=x"}); + // Test: ID({ ID(a *b); }) + // 1. expand ID(a *b) -> a *b + Expansion Exp1(Lex, *Macros); + TokenList Call1 = Exp1.expand("ID", {"a *b"}); + // 2. expand ID({ a *b; }) + TokenList Arg; + Arg.push_back(Lex.id("{")); + Arg.append(Exp1.getTokens().begin(), Exp1.getTokens().end()); + Arg.push_back(Lex.id(";")); + Arg.push_back(Lex.id("}")); + Expansion Exp2(Lex, *Macros); + TokenList Call2 = Exp2.expand("ID", {Arg}); + + // Consume as-if formatted: + // { + // a *b; + // } + UnexpandedMap Unexpanded = + mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded()); + MacroCallReconstructor Unexp(0, Unexpanded); + Matcher E(Exp2.getTokens(), Lex); + Unexp.addLine(line(E.consume("{"))); + EXPECT_FALSE(Unexp.finished()); + Unexp.addLine(line(E.consume("a *b;"))); + EXPECT_FALSE(Unexp.finished()); + Unexp.addLine(line(E.consume("}"))); + EXPECT_TRUE(Unexp.finished()); + + // Expect lines: + // ID({ + // ID(a *b); + // }) + Matcher U1(Call1, Lex); + Matcher U2(Call2, Lex); + auto Chunk2Start = U2.consume("ID("); + auto Chunk2LBrace = U2.consume("{"); + U2.consume("a *b"); + auto Chunk2Mid = U2.consume(";"); + auto Chunk2RBrace = U2.consume("}"); + auto Chunk2End = U2.consume(")"); + auto Chunk1 = U1.consume("ID(a *b)"); + + auto Expected = line({Chunk2Start, + children({ + line(Chunk2LBrace), + line({Chunk1, Chunk2Mid}), + line(Chunk2RBrace), + }), + Chunk2End}); + EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected)); +} + +TEST_F(MacroCallReconstructorTest, NestedChildBlocks) { + auto Macros = createExpander({"ID(x)=x", "CALL(x)=f([] { x })"}); + // Test: ID(CALL(CALL(return a * b;))) + // 1. expand CALL(return a * b;) + Expansion Exp1(Lex, *Macros); + TokenList Call1 = Exp1.expand("CALL", {"return a * b;"}); + // 2. expand CALL(f([] { return a * b; })) + Expansion Exp2(Lex, *Macros); + TokenList Call2 = Exp2.expand("CALL", {Exp1.getTokens()}); + // 3. expand ID({ f([] { f([] { return a * b; }) }) }) + TokenList Arg3; + Arg3.push_back(Lex.id("{")); + Arg3.append(Exp2.getTokens().begin(), Exp2.getTokens().end()); + Arg3.push_back(Lex.id("}")); + Expansion Exp3(Lex, *Macros); + TokenList Call3 = Exp3.expand("ID", {Arg3}); + + // Consume as-if formatted in three unwrapped lines: + // 0: { + // 1: f([] { + // f([] { + // return a * b; + // }) + // }) + // 2: } + UnexpandedMap Unexpanded = mergeUnexpanded( + Exp1.getUnexpanded(), + mergeUnexpanded(Exp2.getUnexpanded(), Exp3.getUnexpanded())); + MacroCallReconstructor Unexp(0, Unexpanded); + Matcher E(Exp3.getTokens(), Lex); + Unexp.addLine(line(E.consume("{"))); + Unexp.addLine( + line({E.consume("f([] {"), + children({line({E.consume("f([] {"), + children({line(E.consume("return a * b;"))}), + E.consume("})")})}), + E.consume("})")})); + Unexp.addLine(line(E.consume("}"))); + EXPECT_TRUE(Unexp.finished()); + + // Expect lines: + // ID( + // { + // CALL(CALL(return a * b;)) + // } + // ) + Matcher U1(Call1, Lex); + Matcher U2(Call2, Lex); + Matcher U3(Call3, Lex); + auto Chunk3Start = U3.consume("ID("); + auto Chunk3LBrace = U3.consume("{"); + U3.consume("f([] { f([] { return a * b; }) })"); + auto Chunk3RBrace = U3.consume("}"); + auto Chunk3End = U3.consume(")"); + auto Chunk2Start = U2.consume("CALL("); + U2.consume("f([] { return a * b; })"); + auto Chunk2End = U2.consume(")"); + auto Chunk1 = U1.consume("CALL(return a * b;)"); + + auto Expected = line({ + Chunk3Start, + children({ + line(Chunk3LBrace), + line({ + Chunk2Start, + Chunk1, + Chunk2End, + }), + line(Chunk3RBrace), + }), + Chunk3End, + }); + EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected)); +} + +TEST_F(MacroCallReconstructorTest, NestedChildrenMultipleArguments) { + auto Macros = createExpander({"CALL(a, b)=f([] { a; b; })"}); + Expansion Exp(Lex, *Macros); + TokenList Call = Exp.expand("CALL", {std::string("int a"), "int b"}); + + MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); + Matcher E(Exp.getTokens(), Lex); + Unexp.addLine(line({ + E.consume("f([] {"), + children({ + line(E.consume("int a;")), + line(E.consume("int b;")), + }), + E.consume("})"), + })); + EXPECT_TRUE(Unexp.finished()); + Matcher U(Call, Lex); + auto Expected = line(U.consume("CALL(int a, int b)")); + EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected)); +} + +TEST_F(MacroCallReconstructorTest, ReverseOrderArgumentsInExpansion) { + auto Macros = createExpander({"CALL(a, b)=b + a"}); + Expansion Exp(Lex, *Macros); + TokenList Call = Exp.expand("CALL", {std::string("x"), "y"}); + + MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); + Matcher E(Exp.getTokens(), Lex); + Unexp.addLine(line(E.consume("y + x"))); + EXPECT_TRUE(Unexp.finished()); + Matcher U(Call, Lex); + auto Expected = line(U.consume("CALL(x, y)")); + EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected)); +} + +TEST_F(MacroCallReconstructorTest, MultipleToplevelUnwrappedLines) { + auto Macros = createExpander({"ID(a, b)=a b"}); + Expansion Exp(Lex, *Macros); + TokenList Call = Exp.expand("ID", {std::string("x; x"), "y"}); + + MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); + Matcher E(Exp.getTokens(), Lex); + Unexp.addLine(line(E.consume("x;"))); + Unexp.addLine(line(E.consume("x y"))); + EXPECT_TRUE(Unexp.finished()); + Matcher U(Call, Lex); + auto Expected = line({ + U.consume("ID("), + children({ + line(U.consume("x;")), + line(U.consume("x")), + }), + U.consume(", y)"), + }); + EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected)); +} + +TEST_F(MacroCallReconstructorTest, NestedCallsMultipleLines) { + auto Macros = createExpander({"ID(x)=x"}); + // Test: ID({ID(a * b);}) + // 1. expand ID(a * b) + Expansion Exp1(Lex, *Macros); + TokenList Call1 = Exp1.expand("ID", {"a * b"}); + // 2. expand ID({ a * b; }) + Expansion Exp2(Lex, *Macros); + TokenList Arg2; + Arg2.push_back(Lex.id("{")); + Arg2.append(Exp1.getTokens().begin(), Exp1.getTokens().end()); + Arg2.push_back(Lex.id(";")); + Arg2.push_back(Lex.id("}")); + TokenList Call2 = Exp2.expand("ID", {Arg2}); + + // Consume as-if formatted in three unwrapped lines: + // 0: { + // 1: a * b; + // 2: } + UnexpandedMap Unexpanded = + mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded()); + MacroCallReconstructor Unexp(0, Unexpanded); + Matcher E(Exp2.getTokens(), Lex); + Unexp.addLine(line(E.consume("{"))); + Unexp.addLine(line(E.consume("a * b;"))); + Unexp.addLine(line(E.consume("}"))); + EXPECT_TRUE(Unexp.finished()); + + // Expect lines: + // ID( + // { + // ID(a * b); + // } + // ) + Matcher U1(Call1, Lex); + Matcher U2(Call2, Lex); + auto Chunk2Start = U2.consume("ID("); + auto Chunk2LBrace = U2.consume("{"); + U2.consume("a * b"); + auto Chunk2Semi = U2.consume(";"); + auto Chunk2RBrace = U2.consume("}"); + auto Chunk2End = U2.consume(")"); + auto Chunk1 = U1.consume("ID(a * b)"); + + auto Expected = line({ + Chunk2Start, + children({ + line({Chunk2LBrace}), + line({Chunk1, Chunk2Semi}), + line({Chunk2RBrace}), + }), + Chunk2End, + }); + EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected)); +} + +TEST_F(MacroCallReconstructorTest, ParentOutsideMacroCall) { + auto Macros = createExpander({"ID(a)=a"}); + Expansion Exp(Lex, *Macros); + TokenList Call = Exp.expand("ID", {std::string("x; y; z;")}); + + auto Prefix = tokens("int a = []() {"); + auto Postfix = tokens("}();"); + MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); + Matcher E(Exp.getTokens(), Lex); + Unexp.addLine(line({ + Prefix, + children({ + line(E.consume("x;")), + line(E.consume("y;")), + line(E.consume("z;")), + }), + Postfix, + })); + EXPECT_TRUE(Unexp.finished()); + Matcher U(Call, Lex); + auto Expected = line({ + Prefix, + children({ + line({ + U.consume("ID("), + children({ + line(U.consume("x;")), + line(U.consume("y;")), + line(U.consume("z;")), + }), + U.consume(")"), + }), + }), + Postfix, + }); + EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected)); +} + +TEST_F(MacroCallReconstructorTest, UnusedMacroArguments) { + auto Macros = createExpander({"X=x"}); + Expansion Exp(Lex, *Macros); + TokenList Call = Exp.expand("X", {"a", "b", "c"}); + + MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); + Unexp.addLine(line(Exp.getTokens())); + EXPECT_TRUE(Unexp.finished()); + Matcher U(Call, Lex); + EXPECT_THAT(std::move(Unexp).takeResult(), + matchesLine(line(U.consume("X(a, b, c)")))); +} + +TEST_F(MacroCallReconstructorTest, UnusedEmptyMacroArgument) { + auto Macros = createExpander({"X=x"}); + Expansion Exp(Lex, *Macros); + TokenList Call = Exp.expand("X", {std::string("")}); + + MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); + Matcher E(Exp.getTokens(), Lex); + auto Semi = tokens(";"); + Unexp.addLine(line({E.consume("x"), Semi})); + EXPECT_TRUE(Unexp.finished()); + Matcher U(Call, Lex); + EXPECT_THAT(std::move(Unexp).takeResult(), + matchesLine(line({U.consume("X()"), Semi}))); +} + +TEST_F(MacroCallReconstructorTest, ChildrenSplitAcrossArguments) { + auto Macros = createExpander({"CALL(a, b)=f([]() a b)"}); + Expansion Exp(Lex, *Macros); + TokenList Call = Exp.expand("CALL", {std::string("{ a;"), "b; }"}); + + MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); + Matcher E(Exp.getTokens(), Lex); + Unexp.addLine(line({ + E.consume("f([]() {"), + children({ + line(E.consume("a;")), + line(E.consume("b;")), + }), + E.consume("})"), + })); + EXPECT_TRUE(Unexp.finished()); + Matcher U(Call, Lex); + auto Expected = line({ + U.consume("CALL({"), + children(line(U.consume("a;"))), + U.consume(", b; })"), + }); + EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected)); +} + +TEST_F(MacroCallReconstructorTest, ChildrenAfterMacroCall) { + auto Macros = createExpander({"CALL(a, b)=f([]() a b"}); + Expansion Exp(Lex, *Macros); + TokenList Call = Exp.expand("CALL", {std::string("{ a"), "b"}); + + MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); + Matcher E(Exp.getTokens(), Lex); + auto Semi = tokens(";"); + auto SecondLine = tokens("c d;"); + auto ThirdLine = tokens("e f;"); + auto Postfix = tokens("})"); + Unexp.addLine(line({ + E.consume("f([]() {"), + children({ + line({E.consume("a b"), Semi}), + line(SecondLine), + line(ThirdLine), + }), + Postfix, + })); + EXPECT_TRUE(Unexp.finished()); + Matcher U(Call, Lex); + auto Expected = line({ + U.consume("CALL({"), + children(line(U.consume("a"))), + U.consume(", b)"), + Semi, + children(line({ + SecondLine, + children(line({ + ThirdLine, + Postfix, + })), + })), + }); + EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected)); +} + +TEST_F(MacroCallReconstructorTest, InvalidCodeSplittingBracesAcrossArgs) { + auto Macros = createExpander({"M(a, b)=(a) (b)"}); + Expansion Exp(Lex, *Macros); + TokenList Call = Exp.expand("M", {std::string("{"), "x", ""}); + + MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); + Matcher E(Exp.getTokens(), Lex); + auto Prefix = tokens("({"); + Unexp.addLine(line({ + Prefix, + children({ + line({ + E.consume("({"), + children({line(E.consume(")(x)"))}), + }), + }), + })); + EXPECT_TRUE(Unexp.finished()); + Matcher U(Call, Lex); + auto Expected = line({ + Prefix, + children({line(U.consume("M({,x,)"))}), + }); + EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected)); +} + +} // namespace +} // namespace format +} // namespace clang