diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt --- a/clang/lib/Format/CMakeLists.txt +++ b/clang/lib/Format/CMakeLists.txt @@ -7,6 +7,7 @@ Format.cpp FormatToken.cpp FormatTokenLexer.cpp + MacroExpander.cpp NamespaceEndCommentsFixer.cpp SortJavaScriptImports.cpp TokenAnalyzer.cpp diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -134,6 +134,57 @@ enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; +/// Roles a token can take in a configured macro expansion. +enum MacroRole { + /// The token was not expanded from a macro call. + MR_None, + /// The token was expanded from a macro argument when formatting the expanded + /// token sequence. + MR_ExpandedArg, + /// The token is part of a macro argument that was previously formatted as + /// expansion when formatting the unexpanded macro call. + MR_UnexpandedArg, + /// The token was expanded from a macro definition, and is not visible as part + /// of the macro call. + MR_Hidden, +}; + +struct FormatToken; +struct MacroContext { + /// The token's role in the macro expansion. + /// When formatting an expanded macro, all tokens that are part of macro + /// arguments will be MR_ExpandedArg, while all tokens that are not visible in + /// the macro call will be MR_Hidden. + /// When formatting an unexpanded macro call, all tokens that are part of + /// macro arguments will be MR_UnexpandedArg. + MacroRole Role = MR_None; + + /// The stack of macro call identifier tokens this token was expanded from. + /// Given the definition: P(a) (a) + /// And the call: P( { P(x) } ) + /// \- P0 \- P1 + /// ExpandedFrom stacks for each generated token will be: + /// ( -> P0 + /// { -> P0 + /// ( -> P0, P1 + /// x -> P0, P1 + /// ) -> P0, P1 + /// } -> P0 + /// ) -> P0 + llvm::SmallVector ExpandedFrom; + + /// Whether this token is the first token in a macro expansion. + bool StartOfExpansion = false; + + /// The number of currently open expansions in \c ExpandedFrom this macro is + /// the last token in. + size_t EndOfExpansion = 0; + + /// When macro expansion introduces parents, those are marked as + /// \c MacroParent, so formatting knows their children need to be formatted. + bool MacroParent = false; +}; + class TokenRole; class AnnotatedLine; @@ -228,7 +279,9 @@ /// A token can have a special role that can carry extra information /// about the token's formatting. - std::unique_ptr Role; + /// FIXME: Make FormatToken for parsing and AnnotatedToken two different + /// classes and make this a unique_ptr in the AnnotatedToken class. + std::shared_ptr Role; /// If this is an opening parenthesis, how are the parameters packed? ParameterPackingKind PackingKind = PPK_Inconclusive; @@ -331,6 +384,10 @@ /// changes. bool Finalized = false; + // Contains all attributes related to how this token takes part + // in a configured macro expansion. + MacroContext MacroCtx; + bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } bool is(TokenType TT) const { return Type == TT; } bool is(const IdentifierInfo *II) const { @@ -572,10 +629,12 @@ : nullptr; } + void copyInto(FormatToken &Tok) { Tok = *this; } + private: - // Disallow copying. + // Only allow copying via the explicit copyInto method. FormatToken(const FormatToken &) = delete; - void operator=(const FormatToken &) = delete; + FormatToken &operator=(const FormatToken &) = default; template bool startsSequenceInternal(A K1, Ts... Tokens) const { diff --git a/clang/lib/Format/MacroExpander.h b/clang/lib/Format/MacroExpander.h new file mode 100644 --- /dev/null +++ b/clang/lib/Format/MacroExpander.h @@ -0,0 +1,86 @@ +//===--- MacroExpander.h - Format C++ code ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of MacroExpander, which handles macro +/// configuration and expansion while formatting. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_MACRO_EXPANDER_H +#define LLVM_CLANG_LIB_FORMAT_MACRO_EXPANDER_H + +#include +#include +#include + +#include "Encoding.h" +#include "FormatToken.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { +class MemoryBuffer; +} + +namespace clang { +class IdentifierTable; +class SourceManager; + +namespace format { +struct FormatStyle; + +/// Takes a set of simple macro definitions as strings and allows expanding +/// calls to those macros. +class MacroExpander { +public: + typedef llvm::ArrayRef> ArgsList; + + /// Construct a macro expander from a set of macro definitions. + /// + /// Each entry in \p Macros must conform to the following simple + /// macro-definition language: + /// ::= | "(" ") + /// ::= | "," + /// ::= | + /// + MacroExpander(const std::vector &Macros, + clang::SourceManager &SourceMgr, const FormatStyle &Style, + encoding::Encoding encoding, + llvm::SpecificBumpPtrAllocator &Allocator, + IdentifierTable &IdentTable); + ~MacroExpander(); + + /// Returns whether a macro \p Name is defined. + bool defined(llvm::StringRef Name); + + /// Returns the expanded stream of format tokens for \p ID, where + /// each element in \p Args is a positional argument to the macro call. + llvm::SmallVector expand(FormatToken *ID, ArgsList Args); + +private: + struct Definition; + class DefinitionParser; + + void parseDefinitions(const std::vector &MacroExpander); + + clang::SourceManager &SourceMgr; + const FormatStyle &Style; + encoding::Encoding Encoding; + llvm::SpecificBumpPtrAllocator &Allocator; + IdentifierTable &IdentTable; + std::vector> Buffers; + llvm::StringMap Definitions; +}; + +} // namespace format +} // namespace clang + +#endif diff --git a/clang/lib/Format/MacroExpander.cpp b/clang/lib/Format/MacroExpander.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Format/MacroExpander.cpp @@ -0,0 +1,205 @@ +//===--- MacroExpander.h - Format C++ code ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the implementation of MacroExpander, which handles macro +/// configuration and expansion while formatting. +/// +//===----------------------------------------------------------------------===// + +#include "MacroExpander.h" + +#include "FormatToken.h" +#include "FormatTokenLexer.h" +#include "clang/Format/Format.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/HeaderSearchOptions.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/ModuleLoader.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang { +namespace format { + +struct MacroExpander::Definition { + StringRef Name; + SmallVector Params; + SmallVector Tokens; +}; + +// A simple macro parser. +class MacroExpander::DefinitionParser { +public: + DefinitionParser(ArrayRef Tokens) : Tokens(Tokens) { + assert(!Tokens.empty()); + Current = Tokens[0]; + } + + // Parse the token stream and return the corresonding Defintion object. + // Returns an empty definition object with a null-Name on error. + MacroExpander::Definition parse() { + if (!Current->is(tok::identifier)) + return {}; + Def.Name = Current->TokenText; + nextToken(); + if (Current->is(tok::l_paren)) { + if (!parseParams()) + return {}; + } + parseExpansion(); + return Def; + } + +private: + bool parseParams() { + if (Current->isNot(tok::l_paren)) + return false; + nextToken(); + while (Current->is(tok::identifier)) { + Def.Params.push_back(Current); + nextToken(); + if (Current->isNot(tok::comma)) + break; + nextToken(); + } + if (Current->isNot(tok::r_paren)) + return false; + nextToken(); + return true; + } + + void parseExpansion() { + do { + Def.Tokens.push_back(Current); + nextToken(); + } while (Current->isNot(tok::eof)); + Def.Tokens.push_back(Current); + } + + void nextToken() { + if (Pos + 1 < Tokens.size()) + ++Pos; + Current = Tokens[Pos]; + Current->Finalized = true; + } + + size_t Pos = 0; + FormatToken *Current = nullptr; + Definition Def; + ArrayRef Tokens; +}; + +MacroExpander::MacroExpander( + const std::vector &Macros, clang::SourceManager &SourceMgr, + const FormatStyle &Style, encoding::Encoding Encoding, + llvm::SpecificBumpPtrAllocator &Allocator, + IdentifierTable &IdentTable) + : SourceMgr(SourceMgr), Style(Style), Encoding(Encoding), + Allocator(Allocator), IdentTable(IdentTable) { + parseDefinitions(Macros); +} + +MacroExpander::~MacroExpander() {} + +void MacroExpander::parseDefinitions( + const std::vector &MacroExpander) { + for (const std::string &Macro : MacroExpander) { + Buffers.push_back( + llvm::MemoryBuffer::getMemBufferCopy(Macro, "")); + clang::FileID FID = + SourceMgr.createFileID(SourceManager::Unowned, Buffers.back().get()); + FormatTokenLexer Lex(SourceMgr, FID, 0, Style, Encoding, Allocator, + IdentTable); + DefinitionParser Parser(Lex.lex()); + auto Definition = Parser.parse(); + Definitions[Definition.Name] = Definition; + } +} + +bool MacroExpander::defined(llvm::StringRef Name) { + return Definitions.find(Name) != Definitions.end(); +} + +llvm::SmallVector MacroExpander::expand(FormatToken *ID, + ArgsList Args) { + assert(defined(ID->TokenText)); + SmallVector Result; + const Definition &Def = Definitions[ID->TokenText]; + + // Map from each argument's name to its position in the argument list. + // With "M(x, y) x + y": + // x -> 0 + // y -> 1 + llvm::StringMap ArgMap; + for (size_t I = 0, E = Def.Params.size(); I != E; ++I) { + ArgMap[Def.Params[I]->TokenText] = I; + } + bool First = true; + + // Adds the given token to Result. + auto pushToken = [&](FormatToken *Tok) { + Tok->MacroCtx.ExpandedFrom.push_back(ID); + if (First) { + Tok->MacroCtx.StartOfExpansion = true; + } + Result.push_back(Tok); + First = false; + }; + + // If Tok references a parameter, adds the corresponding argument to Result. + // Returns false if Tok does not reference a parameter. + auto expandArgument = [&](FormatToken *Tok) -> bool { + // If the current token references a parameter, expand the corresponding + // argument. + if (!Tok->is(tok::identifier)) + return false; + auto I = ArgMap.find(Tok->TokenText); + if (I == ArgMap.end()) + return false; + // If there are fewer arguments than referenced parameters, skip the + // parameter. + // FIXME: Potentially fully abort the expansion instead. + if (I->getValue() >= Args.size()) + return true; + for (const auto &Tok : Args[I->getValue()]) { + // A token can be part of multiple macro arguments. + // For example, with "ID(x) x": + // in ID(ID(x)), 'x' is expanded first as argument to the inner + // ID, then again as argument to the outer ID. We keep the macro + // role the token had from the inner expansion. + if (Tok->MacroCtx.Role == MR_None) + Tok->MacroCtx.Role = MR_ExpandedArg; + pushToken(Tok); + } + return true; + }; + + // Expand the definition into Restlt. + for (FormatToken *Tok : Definitions[ID->TokenText].Tokens) { + if (expandArgument(Tok)) + continue; + // Create a copy of the tokens that were not part of the macro argument, + // i.e. were not provided by user code. + FormatToken *New = new (Allocator.Allocate()) FormatToken; + Tok->copyInto(*New); + assert(New->MacroCtx.Role == MR_None); + // Tokens that are not part of the user code do not need to be formatted. + New->MacroCtx.Role = MR_Hidden; + pushToken(New); + } + assert(Result.size() >= 1); + if (Result.size() > 1) + ++Result[Result.size() - 2]->MacroCtx.EndOfExpansion; + return Result; +} + +} // namespace format +} // namespace clang diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt --- a/clang/unittests/Format/CMakeLists.txt +++ b/clang/unittests/Format/CMakeLists.txt @@ -15,6 +15,7 @@ FormatTestSelective.cpp FormatTestTableGen.cpp FormatTestTextProto.cpp + MacroExpanderTest.cpp NamespaceEndCommentsFixerTest.cpp SortImportsTestJS.cpp SortImportsTestJava.cpp diff --git a/clang/unittests/Format/MacroExpanderTest.cpp b/clang/unittests/Format/MacroExpanderTest.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Format/MacroExpanderTest.cpp @@ -0,0 +1,167 @@ +#include "../../lib/Format/MacroExpander.h" +#include "TestLexer.h" +#include "clang/Basic/FileManager.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace format { + +namespace { + +class MacroExpanderTest : public ::testing::Test { +public: + std::unique_ptr + create(const std::vector &MacroDefinitions) { + return std::make_unique( + MacroDefinitions, Lex.SourceMgr.get(), Lex.Style, Lex.Encoding, + Lex.Allocator, Lex.IdentTable); + } + + std::string expand(MacroExpander &Macros, llvm::StringRef Name, + const std::vector &Args = {}) { + EXPECT_TRUE(Macros.defined(Name)); + return text(Macros.expand(Lex.id(Name), lexArgs(Args))); + } + + llvm::SmallVector + lexArgs(const std::vector &Args) { + llvm::SmallVector Result; + for (const auto &Arg : Args) { + Result.push_back(uneof(Lex.lex(Arg))); + } + return Result; + } + + struct MacroAttributes { + clang::tok::TokenKind Kind; + MacroRole Role; + bool Start; + size_t End; + llvm::SmallVector ExpandedFrom; + }; + + void expectAttributes(const TokenList &Tokens, + const std::vector &Attributes) { + EXPECT_EQ(Tokens.size(), Attributes.size()) << text(Tokens); + for (size_t I = 0, E = Tokens.size(); I != E; ++I) { + if (I >= Attributes.size()) + continue; + std::string Context = + ("for token " + llvm::Twine(I) + ": " + Tokens[I]->Tok.getName() + + " / " + Tokens[I]->TokenText) + .str(); + EXPECT_TRUE(Tokens[I]->is(Attributes[I].Kind)) + << Context << " in " << text(Tokens); + EXPECT_EQ(Tokens[I]->MacroCtx.Role, Attributes[I].Role) + << Context << " in " << text(Tokens); + EXPECT_EQ(Tokens[I]->MacroCtx.StartOfExpansion, Attributes[I].Start) + << Context << " in " << text(Tokens); + EXPECT_EQ(Tokens[I]->MacroCtx.EndOfExpansion, Attributes[I].End) + << Context << " in " << text(Tokens); + EXPECT_EQ(Tokens[I]->MacroCtx.ExpandedFrom, Attributes[I].ExpandedFrom) + << Context << " in " << text(Tokens); + } + } + + TestLexer Lex; +}; + +TEST_F(MacroExpanderTest, SkipsDefinitionOnError) { + auto Macros = + create({"A(", "B(,", "C(a,", "D(a a", "E(a, a", "F(,)", "G(a;"}); + for (const auto *Name : {"A", "B", "C", "D", "E", "F", "G"}) { + EXPECT_FALSE(Macros->defined(Name)) << "for Name " << Name; + } +} + +TEST_F(MacroExpanderTest, ExpandsWithoutArguments) { + auto Macros = create({ + "A", + "B b", + "C c + c", + "D()", + }); + EXPECT_EQ("", expand(*Macros, "A")); + EXPECT_EQ("b", expand(*Macros, "B")); + EXPECT_EQ("c+c", expand(*Macros, "C")); + EXPECT_EQ("", expand(*Macros, "D")); +} + +TEST_F(MacroExpanderTest, ExpandsWithArguments) { + auto Macros = create({ + "A(x)", + "B(x, y) x + y", + }); + EXPECT_EQ("", expand(*Macros, "A", {"a"})); + EXPECT_EQ("b1+b2+b3", expand(*Macros, "B", {"b1", "b2 + b3"})); + EXPECT_EQ("x+", expand(*Macros, "B", {"x"})); +} + +TEST_F(MacroExpanderTest, AttributizesTokens) { + auto Macros = create({ + "A(x, y) { x + y; }", + "B(x, y) x + 3 + y", + }); + auto *A = Lex.id("A"); + auto AArgs = lexArgs({"a1 * a2", "a3 * a4"}); + auto Result = Macros->expand(A, AArgs); + EXPECT_EQ(11U, Result.size()) << text(Result) << " / " << Result; + EXPECT_EQ("{a1*a2+a3*a4;}", text(Result)); + std::vector Attributes = { + {tok::l_brace, MR_Hidden, true, 0, {A}}, + {tok::identifier, MR_ExpandedArg, false, 0, {A}}, + {tok::star, MR_ExpandedArg, false, 0, {A}}, + {tok::identifier, MR_ExpandedArg, false, 0, {A}}, + {tok::plus, MR_Hidden, false, 0, {A}}, + {tok::identifier, MR_ExpandedArg, false, 0, {A}}, + {tok::star, MR_ExpandedArg, false, 0, {A}}, + {tok::identifier, MR_ExpandedArg, false, 0, {A}}, + {tok::semi, MR_Hidden, false, 0, {A}}, + {tok::r_brace, MR_Hidden, false, 1, {A}}, + {tok::eof, MR_Hidden, false, 0, {A}}, + }; + expectAttributes(Result, Attributes); + + auto *B = Lex.id("B"); + auto BArgs = lexArgs({"b1", "b2"}); + Result = Macros->expand(B, BArgs); + EXPECT_EQ(6U, Result.size()) << text(Result) << " / " << Result; + EXPECT_EQ("b1+3+b2", text(Result)); + Attributes = { + {tok::identifier, MR_ExpandedArg, true, 0, {B}}, + {tok::plus, MR_Hidden, false, 0, {B}}, + {tok::numeric_constant, MR_Hidden, false, 0, {B}}, + {tok::plus, MR_Hidden, false, 0, {B}}, + {tok::identifier, MR_ExpandedArg, false, 1, {B}}, + {tok::eof, MR_Hidden, false, 0, {B}}, + }; + expectAttributes(Result, Attributes); +} + +TEST_F(MacroExpanderTest, RecursiveExpansion) { + auto Macros = create({ + "A(x) x", + "B(x) x", + "C(x) x", + }); + + auto *A = Lex.id("A"); + auto *B = Lex.id("B"); + auto *C = Lex.id("C"); + + auto Args = lexArgs({"id"}); + auto CResult = uneof(Macros->expand(C, Args)); + auto BResult = uneof(Macros->expand(B, CResult)); + auto AResult = uneof(Macros->expand(A, BResult)); + + std::vector Attributes = { + {tok::identifier, MR_ExpandedArg, true, 3, {C, B, A}}, + }; + expectAttributes(AResult, Attributes); +} + +} // namespace +} // namespace format +} // namespace clang diff --git a/clang/unittests/Format/TestLexer.h b/clang/unittests/Format/TestLexer.h new file mode 100644 --- /dev/null +++ b/clang/unittests/Format/TestLexer.h @@ -0,0 +1,88 @@ +//===--- TestLexer.h - Format C++ code --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains a TestLexer to create FormatTokens from strings. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_UNITTESTS_FORMAT_TEST_LEXER_H +#define LLVM_CLANG_UNITTESTS_FORMAT_TEST_LEXER_H + +#include "../../lib/Format/FormatTokenLexer.h" + +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" + +#include +#include + +namespace clang { +namespace format { + +typedef llvm::SmallVector TokenList; + +inline std::ostream &operator<<(std::ostream &Stream, const FormatToken &Tok) { + Stream << "(" << Tok.Tok.getName() << ", \"" << Tok.TokenText.str() << "\")"; + return Stream; +} +inline std::ostream &operator<<(std::ostream &Stream, const TokenList &Tokens) { + Stream << "{"; + for (size_t I = 0, E = Tokens.size(); I != E; ++I) { + Stream << (I > 0 ? ", " : "") << *Tokens[I]; + } + Stream << "}"; + return Stream; +} + +inline TokenList uneof(const TokenList &Tokens) { + assert(!Tokens.empty() && Tokens.back()->is(tok::eof)); + return TokenList(Tokens.begin(), std::prev(Tokens.end())); +} + +inline std::string text(llvm::ArrayRef Tokens) { + return std::accumulate(Tokens.begin(), Tokens.end(), std::string(), + [](const std::string &R, FormatToken *Tok) { + return (R + Tok->TokenText).str(); + }); +} + +class TestLexer { +public: + TestLexer() : SourceMgr("test.cpp", "") {} + + TokenList lex(llvm::StringRef Code) { + Buffers.push_back( + llvm::MemoryBuffer::getMemBufferCopy(Code, "")); + clang::FileID FID = SourceMgr.get().createFileID(SourceManager::Unowned, + Buffers.back().get()); + FormatTokenLexer Lex(SourceMgr.get(), FID, 0, Style, Encoding, Allocator, + IdentTable); + auto Result = Lex.lex(); + return TokenList(Result.begin(), Result.end()); + } + + FormatToken *id(llvm::StringRef Code) { + auto Result = uneof(lex(Code)); + assert(Result.size() == 1U && "Code must expand to 1 token."); + return Result[0]; + } + + FormatStyle Style = getLLVMStyle(); + encoding::Encoding Encoding = encoding::Encoding_UTF8; + std::vector> Buffers; + clang::SourceManagerForFile SourceMgr; + llvm::SpecificBumpPtrAllocator Allocator; + IdentifierTable IdentTable; +}; + +} // namespace format +} // namespace clang + +#endif // LLVM_CLANG_UNITTESTS_FORMAT_TEST_LEXER_H