diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -227,6 +227,8 @@ /// #pragma, etc. llvm::ArrayRef spelledTokens(FileID FID) const; + const SourceManager &sourceManager() const { return *SourceMgr; } + std::string dumpForTests() const; private: @@ -310,9 +312,18 @@ LLVM_NODISCARD TokenBuffer consume() &&; private: + /// Maps from a start location to an end location of each mapping. + /// 1. range from '#' to the last token in the line for PP directives, + /// 2. macro name and arguments for macro expansions. + using SpelledMappings = + llvm::DenseMap; class Builder; + class Callbacks; + std::vector Expanded; + SpelledMappings Mappings; Preprocessor &PP; + Callbacks *CB; }; } // namespace syntax diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -14,6 +14,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" +#include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/Token.h" #include "llvm/ADT/ArrayRef.h" @@ -252,6 +253,33 @@ return Tokens; } +class TokenCollector::Callbacks : public PPCallbacks { +public: + Callbacks(TokenCollector &C) : C(&C) {} + + /// Disabled instance will stop reporting anything to TokenCollector. + void disable() { C = nullptr; } + + void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD, + SourceRange Range, const MacroArgs *Args) override { + if (!C) + return; + // Do not record recursive expansions. + if (!MacroNameTok.getLocation().isFileID() || + (LastExpansionEnd.isValid() && + C->PP.getSourceManager().isBeforeInTranslationUnit(Range.getBegin(), + LastExpansionEnd))) + return; + C->Mappings[Range.getBegin().getRawEncoding()] = Range.getEnd(); + LastExpansionEnd = Range.getEnd(); + } + // FIXME: handle #pragma, #include, etc. +private: + TokenCollector *C; + /// Used to detect recursive macro expansions. + SourceLocation LastExpansionEnd; +}; + /// Fills in the TokenBuffer by tracing the run of a preprocessor. The /// implementation tracks the tokens, macro expansions and directives coming /// from the preprocessor and: @@ -279,15 +307,20 @@ ); Expanded.push_back(syntax::Token(T)); }); + // And locations of macro calls, to properly recover boundaries of those in + // case of empty expansions. + auto CB = llvm::make_unique(*this); + this->CB = CB.get(); + PP.addPPCallbacks(std::move(CB)); } /// Builds mappings and spelled tokens in the TokenBuffer based on the expanded /// token stream. class TokenCollector::Builder { public: - Builder(std::vector Expanded, const SourceManager &SM, - const LangOptions &LangOpts) - : Result(SM), SM(SM), LangOpts(LangOpts) { + Builder(std::vector Expanded, SpelledMappings Mappings, + const SourceManager &SM, const LangOptions &LangOpts) + : Result(SM), Mappings(std::move(Mappings)), SM(SM), LangOpts(LangOpts) { Result.ExpandedTokens = std::move(Expanded); } @@ -296,6 +329,9 @@ // Walk over expanded tokens and spelled tokens in parallel, building the // mappings between those using source locations. + // To correctly recover empty macro expansions, we also take locations + // reported to PPCallbacks::MacroExpands into account as we do not have any + // expanded tokens with source locations to guide us. // The 'eof' token is special, it is not part of spelled token stream. We // handle it separately at the end. @@ -356,22 +392,17 @@ fillGapUntil(File, SpelledRange.getBegin(), I); - TokenBuffer::Mapping M; - // Skip the spelled macro tokens. - std::tie(M.BeginSpelled, M.EndSpelled) = - consumeSpelledUntil(File, SpelledRange.getEnd().getLocWithOffset(1)); // Skip all expanded tokens from the same macro expansion. - M.BeginExpanded = I; + unsigned BeginExpanded = I; for (; I + 1 < Result.ExpandedTokens.size(); ++I) { auto NextL = Result.ExpandedTokens[I + 1].location(); if (!NextL.isMacroID() || SM.getExpansionLoc(NextL) != SpelledRange.getBegin()) break; } - M.EndExpanded = I + 1; - - // Add a resulting mapping. - File.Mappings.push_back(M); + unsigned EndExpanded = I + 1; + consumeMapping(File, SM.getFileOffset(SpelledRange.getEnd()), BeginExpanded, + EndExpanded, NextSpelled[FID]); } /// Initializes TokenBuffer::Files and fills spelled tokens and expanded @@ -393,69 +424,108 @@ } } - /// Consumed spelled tokens until location L is reached (token starting at L - /// is not included). Returns the indicies of the consumed range. - std::pair - consumeSpelledUntil(TokenBuffer::MarkedFile &File, SourceLocation L) { - assert(L.isFileID()); - FileID FID; - unsigned Offset; - std::tie(FID, Offset) = SM.getDecomposedLoc(L); + void consumeEmptyMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset, + unsigned ExpandedIndex, unsigned &SpelledIndex) { + consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex); + } - // (!) we update the index in-place. - unsigned &SpelledI = NextSpelled[FID]; - unsigned Before = SpelledI; - for (; SpelledI < File.SpelledTokens.size() && - SM.getFileOffset(File.SpelledTokens[SpelledI].location()) < Offset; - ++SpelledI) { - } - return std::make_pair(Before, /*After*/ SpelledI); - }; + /// Consumes spelled tokens that form a macro expansion and adds a entry to + /// the resulting token buffer. + /// (!) SpelledIndex is updated in-place. + void consumeMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset, + unsigned BeginExpanded, unsigned EndExpanded, + unsigned &SpelledIndex) { + // We need to record this mapping before continuing. + unsigned MappingBegin = SpelledIndex; + ++SpelledIndex; + + bool HitMapping = + tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue(); + (void)HitMapping; + assert(!HitMapping && "recursive macro expansion?"); + + TokenBuffer::Mapping M; + M.BeginExpanded = BeginExpanded; + M.EndExpanded = EndExpanded; + M.BeginSpelled = MappingBegin; + M.EndSpelled = SpelledIndex; + + File.Mappings.push_back(M); + } /// Consumes spelled tokens until location \p L is reached and adds a mapping /// covering the consumed tokens. The mapping will point to an empty expanded /// range at position \p ExpandedIndex. void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L, unsigned ExpandedIndex) { - unsigned BeginSpelledGap, EndSpelledGap; - std::tie(BeginSpelledGap, EndSpelledGap) = consumeSpelledUntil(File, L); - if (BeginSpelledGap == EndSpelledGap) - return; // No gap. - TokenBuffer::Mapping M; - M.BeginSpelled = BeginSpelledGap; - M.EndSpelled = EndSpelledGap; - M.BeginExpanded = M.EndExpanded = ExpandedIndex; - File.Mappings.push_back(M); + assert(L.isFileID()); + FileID FID; + unsigned Offset; + std::tie(FID, Offset) = SM.getDecomposedLoc(L); + + unsigned &SpelledIndex = NextSpelled[FID]; + unsigned MappingBegin = SpelledIndex; + while (true) { + auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex); + if (SpelledIndex != MappingBegin) { + TokenBuffer::Mapping M; + M.BeginSpelled = MappingBegin; + M.EndSpelled = SpelledIndex; + M.BeginExpanded = M.EndExpanded = ExpandedIndex; + File.Mappings.push_back(M); + } + if (!EndLoc) + break; + consumeEmptyMapping(File, SM.getFileOffset(*EndLoc), ExpandedIndex, + SpelledIndex); + + MappingBegin = SpelledIndex; + } }; + /// Consumes spelled tokens until it reaches Offset or a mapping boundary, + /// i.e. a name of a macro expansion or the start '#' token of a PP directive. + /// (!) NextSpelled is updated in place. + /// + /// returns None if \p Offset was reached, otherwise returns the end location + /// of a mapping that starts at \p NextSpelled. + llvm::Optional + tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File, unsigned Offset, + unsigned &NextSpelled) { + for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) { + auto L = File.SpelledTokens[NextSpelled].location(); + if (Offset <= SM.getFileOffset(L)) + return llvm::None; // reached the offset we are looking for. + auto Mapping = Mappings.find(L.getRawEncoding()); + if (Mapping != Mappings.end()) + return Mapping->second; // found a mapping before the offset. + } + return llvm::None; // no more tokens, we "reached" the offset. + } + /// Adds empty mappings for unconsumed spelled tokens at the end of each file. void fillGapsAtEndOfFiles() { for (auto &F : Result.Files) { - unsigned Next = NextSpelled[F.first]; - if (F.second.SpelledTokens.size() == Next) - continue; // All spelled tokens are accounted for. - - // Record a mapping for the gap at the end of the spelled tokens. - TokenBuffer::Mapping M; - M.BeginSpelled = Next; - M.EndSpelled = F.second.SpelledTokens.size(); - M.BeginExpanded = F.second.EndExpanded; - M.EndExpanded = F.second.EndExpanded; - - F.second.Mappings.push_back(M); + if (F.second.SpelledTokens.empty()) + continue; + fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(), + F.second.EndExpanded); } } TokenBuffer Result; /// For each file, a position of the next spelled token we will consume. llvm::DenseMap NextSpelled; + SpelledMappings Mappings; const SourceManager &SM; const LangOptions &LangOpts; }; TokenBuffer TokenCollector::consume() && { PP.setTokenWatcher(nullptr); - return Builder(std::move(Expanded), PP.getSourceManager(), PP.getLangOpts()) + CB->disable(); + return Builder(std::move(Expanded), std::move(Mappings), + PP.getSourceManager(), PP.getLangOpts()) .build(); } diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -428,7 +428,9 @@ spelled tokens: # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) mappings: - ['#'_0, ''_18) => [''_0, ''_0) + ['#'_0, 'EMPTY'_9) => [''_0, ''_0) + ['EMPTY'_9, 'EMPTY_FUNC'_10) => [''_0, ''_0) + ['EMPTY_FUNC'_10, ''_18) => [''_0, ''_0) )"}, // File ends with a macro replacement. {R"cpp(