diff --git a/clang/include/clang/Lex/MacroInfo.h b/clang/include/clang/Lex/MacroInfo.h --- a/clang/include/clang/Lex/MacroInfo.h +++ b/clang/include/clang/Lex/MacroInfo.h @@ -54,11 +54,14 @@ /// macro, this includes the \c __VA_ARGS__ identifier on the list. IdentifierInfo **ParameterList = nullptr; + /// This is the list of tokens that the macro is defined to. + const Token *ReplacementTokens = nullptr; + /// \see ParameterList unsigned NumParameters = 0; - /// This is the list of tokens that the macro is defined to. - SmallVector ReplacementTokens; + /// \see ReplacementTokens + unsigned NumReplacementTokens = 0; /// Length in characters of the macro definition. mutable unsigned DefinitionLength; @@ -230,26 +233,47 @@ bool isWarnIfUnused() const { return IsWarnIfUnused; } /// Return the number of tokens that this macro expands to. - unsigned getNumTokens() const { return ReplacementTokens.size(); } + unsigned getNumTokens() const { return NumReplacementTokens; } const Token &getReplacementToken(unsigned Tok) const { - assert(Tok < ReplacementTokens.size() && "Invalid token #"); + assert(Tok < NumReplacementTokens && "Invalid token #"); return ReplacementTokens[Tok]; } - using tokens_iterator = SmallVectorImpl::const_iterator; + using const_tokens_iterator = const Token *; - tokens_iterator tokens_begin() const { return ReplacementTokens.begin(); } - tokens_iterator tokens_end() const { return ReplacementTokens.end(); } - bool tokens_empty() const { return ReplacementTokens.empty(); } - ArrayRef tokens() const { return ReplacementTokens; } + const_tokens_iterator tokens_begin() const { return ReplacementTokens; } + const_tokens_iterator tokens_end() const { + return ReplacementTokens + NumReplacementTokens; + } + bool tokens_empty() const { return NumReplacementTokens == 0; } + ArrayRef tokens() const { + return llvm::makeArrayRef(ReplacementTokens, NumReplacementTokens); + } - /// Add the specified token to the replacement text for the macro. - void AddTokenToBody(const Token &Tok) { + llvm::MutableArrayRef + allocateTokens(unsigned NumTokens, llvm::BumpPtrAllocator &PPAllocator) { + assert(ReplacementTokens == nullptr && NumReplacementTokens == 0 && + "Token list already allocated!"); + NumReplacementTokens = NumTokens; + Token *NewReplacementTokens = PPAllocator.Allocate(NumTokens); + ReplacementTokens = NewReplacementTokens; + return llvm::makeMutableArrayRef(NewReplacementTokens, NumTokens); + } + + void setTokens(ArrayRef Tokens, llvm::BumpPtrAllocator &PPAllocator) { assert( !IsDefinitionLengthCached && "Changing replacement tokens after definition length got calculated"); - ReplacementTokens.push_back(Tok); + assert(ReplacementTokens == nullptr && NumReplacementTokens == 0 && + "Token list already set!"); + if (Tokens.empty()) + return; + + NumReplacementTokens = Tokens.size(); + Token *NewReplacementTokens = PPAllocator.Allocate(Tokens.size()); + std::copy(Tokens.begin(), Tokens.end(), NewReplacementTokens); + ReplacementTokens = NewReplacementTokens; } /// Return true if this macro is enabled. diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp --- a/clang/lib/Lex/MacroInfo.cpp +++ b/clang/lib/Lex/MacroInfo.cpp @@ -28,6 +28,25 @@ using namespace clang; +namespace { + +// MacroInfo is expected to take 40 bytes on platforms with an 8 byte pointer +// and 4 byte SourceLocation. +template class MacroInfoSizeChecker { +public: + constexpr static bool AsExpected = true; +}; +template <> class MacroInfoSizeChecker<8> { +public: + constexpr static bool AsExpected = + sizeof(MacroInfo) == (32 + sizeof(SourceLocation) * 2); +}; + +static_assert(MacroInfoSizeChecker::AsExpected, + "Unexpected size of MacroInfo"); + +} // end namespace + MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc), IsDefinitionLengthCached(false), IsFunctionLike(false), IsC99Varargs(false), IsGNUVarargs(false), IsBuiltinMacro(false), @@ -39,6 +58,7 @@ assert(!IsDefinitionLengthCached); IsDefinitionLengthCached = true; + ArrayRef ReplacementTokens = tokens(); if (ReplacementTokens.empty()) return (DefinitionLength = 0); @@ -76,7 +96,7 @@ bool Lexically = !Syntactically; // Check # tokens in replacement, number of args, and various flags all match. - if (ReplacementTokens.size() != Other.ReplacementTokens.size() || + if (getNumTokens() != Other.getNumTokens() || getNumParams() != Other.getNumParams() || isFunctionLike() != Other.isFunctionLike() || isC99Varargs() != Other.isC99Varargs() || @@ -92,7 +112,7 @@ } // Check all the tokens. - for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) { + for (unsigned i = 0; i != NumReplacementTokens; ++i) { const Token &A = ReplacementTokens[i]; const Token &B = Other.ReplacementTokens[i]; if (A.getKind() != B.getKind()) @@ -157,7 +177,7 @@ } bool First = true; - for (const Token &Tok : ReplacementTokens) { + for (const Token &Tok : tokens()) { // Leading space is semantically meaningful in a macro definition, // so preserve it in the dump output. if (First || Tok.hasLeadingSpace()) diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -2711,12 +2711,14 @@ if (!Tok.is(tok::eod)) LastTok = Tok; + SmallVector Tokens; + // Read the rest of the macro body. if (MI->isObjectLike()) { // Object-like macros are very simple, just read their body. while (Tok.isNot(tok::eod)) { LastTok = Tok; - MI->AddTokenToBody(Tok); + Tokens.push_back(Tok); // Get the next token of the macro. LexUnexpandedToken(Tok); } @@ -2731,7 +2733,7 @@ LastTok = Tok; if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) { - MI->AddTokenToBody(Tok); + Tokens.push_back(Tok); if (VAOCtx.isVAOptToken(Tok)) { // If we're already within a VAOPT, emit an error. @@ -2745,7 +2747,7 @@ Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use); return nullptr; } - MI->AddTokenToBody(Tok); + Tokens.push_back(Tok); VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation()); LexUnexpandedToken(Tok); if (Tok.is(tok::hashhash)) { @@ -2756,10 +2758,10 @@ } else if (VAOCtx.isInVAOpt()) { if (Tok.is(tok::r_paren)) { if (VAOCtx.sawClosingParen()) { - const unsigned NumTokens = MI->getNumTokens(); - assert(NumTokens >= 3 && "Must have seen at least __VA_OPT__( " - "and a subsequent tok::r_paren"); - if (MI->getReplacementToken(NumTokens - 2).is(tok::hashhash)) { + assert(Tokens.size() >= 3 && + "Must have seen at least __VA_OPT__( " + "and a subsequent tok::r_paren"); + if (Tokens[Tokens.size() - 2].is(tok::hashhash)) { Diag(Tok, diag::err_vaopt_paste_at_end); return nullptr; } @@ -2778,7 +2780,7 @@ // things. if (getLangOpts().TraditionalCPP) { Tok.setKind(tok::unknown); - MI->AddTokenToBody(Tok); + Tokens.push_back(Tok); // Get the next token of the macro. LexUnexpandedToken(Tok); @@ -2794,17 +2796,16 @@ LexUnexpandedToken(Tok); if (Tok.is(tok::eod)) { - MI->AddTokenToBody(LastTok); + Tokens.push_back(LastTok); break; } - unsigned NumTokens = MI->getNumTokens(); - if (NumTokens && Tok.getIdentifierInfo() == Ident__VA_ARGS__ && - MI->getReplacementToken(NumTokens-1).is(tok::comma)) + if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ && + Tokens[Tokens.size() - 1].is(tok::comma)) MI->setHasCommaPasting(); // Things look ok, add the '##' token to the macro. - MI->AddTokenToBody(LastTok); + Tokens.push_back(LastTok); continue; } @@ -2823,7 +2824,7 @@ // confused. if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) { LastTok.setKind(tok::unknown); - MI->AddTokenToBody(LastTok); + Tokens.push_back(LastTok); continue; } else { Diag(Tok, diag::err_pp_stringize_not_parameter) @@ -2833,13 +2834,13 @@ } // Things look ok, add the '#' and param name tokens to the macro. - MI->AddTokenToBody(LastTok); + Tokens.push_back(LastTok); // If the token following '#' is VAOPT, let the next iteration handle it // and check it for correctness, otherwise add the token and prime the // loop with the next one. if (!VAOCtx.isVAOptToken(Tok)) { - MI->AddTokenToBody(Tok); + Tokens.push_back(Tok); LastTok = Tok; // Get the next token of the macro. @@ -2855,6 +2856,8 @@ } } MI->setDefinitionEndLoc(LastTok.getLocation()); + + MI->setTokens(Tokens, BP); return MI; } /// HandleDefineDirective - Implements \#define. This consumes the entire macro @@ -3007,7 +3010,7 @@ Tok.startToken(); Tok.setKind(tok::kw__Static_assert); Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert")); - MI->AddTokenToBody(Tok); + MI->setTokens({Tok}, BP); (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI); } } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1694,6 +1694,7 @@ RecordData Record; SmallVector MacroParams; MacroInfo *Macro = nullptr; + llvm::MutableArrayRef MacroTokens; while (true) { // Advance to the next record, but if we get to the end of the block, don't @@ -1748,7 +1749,8 @@ MI->setDefinitionEndLoc(ReadSourceLocation(F, Record, NextIndex)); MI->setIsUsed(Record[NextIndex++]); MI->setUsedForHeaderGuard(Record[NextIndex++]); - + MacroTokens = MI->allocateTokens(Record[NextIndex++], + PP.getPreprocessorAllocator()); if (RecType == PP_MACRO_FUNCTION_LIKE) { // Decode function-like macro info. bool isC99VarArgs = Record[NextIndex++]; @@ -1793,10 +1795,14 @@ // If we see a TOKEN before a PP_MACRO_*, then the file is // erroneous, just pretend we didn't see this. if (!Macro) break; + if (MacroTokens.empty()) { + Error("unexpected number of macro tokens for a macro in AST file"); + return Macro; + } unsigned Idx = 0; - Token Tok = ReadToken(F, Record, Idx); - Macro->AddTokenToBody(Tok); + MacroTokens[0] = ReadToken(F, Record, Idx); + MacroTokens = MacroTokens.drop_front(); break; } } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -2431,6 +2431,7 @@ AddSourceLocation(MI->getDefinitionEndLoc(), Record); Record.push_back(MI->isUsed()); Record.push_back(MI->isUsedForHeaderGuard()); + Record.push_back(MI->getNumTokens()); unsigned Code; if (MI->isObjectLike()) { Code = PP_MACRO_OBJECT_LIKE; diff --git a/clang/unittests/Lex/CMakeLists.txt b/clang/unittests/Lex/CMakeLists.txt --- a/clang/unittests/Lex/CMakeLists.txt +++ b/clang/unittests/Lex/CMakeLists.txt @@ -9,6 +9,7 @@ LexerTest.cpp PPCallbacksTest.cpp PPConditionalDirectiveRecordTest.cpp + PPMemoryAllocationsTest.cpp ) clang_target_link_libraries(LexTests diff --git a/clang/unittests/Lex/PPMemoryAllocationsTest.cpp b/clang/unittests/Lex/PPMemoryAllocationsTest.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Lex/PPMemoryAllocationsTest.cpp @@ -0,0 +1,97 @@ +//===- unittests/Lex/PPMemoryAllocationsTest.cpp - ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--------------------------------------------------------------===// + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TargetOptions.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/HeaderSearchOptions.h" +#include "clang/Lex/ModuleLoader.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "gtest/gtest.h" + +using namespace clang; + +namespace { + +class PPMemoryAllocationsTest : public ::testing::Test { +protected: + PPMemoryAllocationsTest() + : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()), + Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), + SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) { + TargetOpts->Triple = "x86_64-apple-darwin11.1.0"; + Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts); + } + + FileSystemOptions FileMgrOpts; + FileManager FileMgr; + IntrusiveRefCntPtr DiagID; + DiagnosticsEngine Diags; + SourceManager SourceMgr; + LangOptions LangOpts; + std::shared_ptr TargetOpts; + IntrusiveRefCntPtr Target; +}; + +TEST_F(PPMemoryAllocationsTest, PPMacroDefinesAllocations) { + std::string Source; + size_t NumMacros = 1000000; + { + llvm::raw_string_ostream SourceOS(Source); + + // Create a combination of 1 or 3 token macros. + for (size_t I = 0; I < NumMacros; ++I) { + SourceOS << "#define MACRO_ID_" << I << " "; + if ((I % 2) == 0) + SourceOS << "(" << I << ")"; + else + SourceOS << I; + SourceOS << "\n"; + } + } + + std::unique_ptr Buf = + llvm::MemoryBuffer::getMemBuffer(Source); + SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); + + TrivialModuleLoader ModLoader; + HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, + Diags, LangOpts, Target.get()); + Preprocessor PP(std::make_shared(), Diags, LangOpts, + SourceMgr, HeaderInfo, ModLoader, + /*IILookup =*/nullptr, + /*OwnsHeaderSearch =*/false); + PP.Initialize(*Target); + PP.EnterMainSourceFile(); + + while (1) { + Token tok; + PP.Lex(tok); + if (tok.is(tok::eof)) + break; + } + + size_t NumAllocated = PP.getPreprocessorAllocator().getBytesAllocated(); + float BytesPerDefine = float(NumAllocated) / float(NumMacros); + llvm::errs() << "Num preprocessor allocations for " << NumMacros + << " #define: " << NumAllocated << "\n"; + llvm::errs() << "Bytes per #define: " << BytesPerDefine << "\n"; + // On arm64-apple-macos, we get around 120 bytes per define. + // Assume a reasonable upper bound based on that number that we don't want + // to exceed when storing information about a macro #define with 1 or 3 + // tokens. + EXPECT_LT(BytesPerDefine, 130.0f); +} + +} // anonymous namespace