diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -877,6 +877,14 @@ return CharSourceRange::getCharRange(Begin, End); } +// Assumes that `Loc` is in an expansion. +static bool isInExpansionTokenRange(const SourceLocation Loc, + const SourceManager &SM) { + return SM.getSLocEntry(SM.getFileID(Loc)) + .getExpansion() + .isExpansionTokenRange(); +} + CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts) { @@ -896,10 +904,12 @@ } if (Begin.isFileID() && End.isMacroID()) { - if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts, - &End)) || - (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts, - &End))) + if (Range.isTokenRange()) { + if (!isAtEndOfMacroExpansion(End, SM, LangOpts, &End)) + return {}; + // Use the *original* end, not the expanded one in `End`. + Range.setTokenRange(isInExpansionTokenRange(Range.getEnd(), SM)); + } else if (!isAtStartOfMacroExpansion(End, SM, LangOpts, &End)) return {}; Range.setEnd(End); return makeRangeFromFileLocs(Range, SM, LangOpts); @@ -914,6 +924,9 @@ &MacroEnd)))) { Range.setBegin(MacroBegin); Range.setEnd(MacroEnd); + // Use the *original* `End`, not the expanded one in `MacroEnd`. + if (Range.isTokenRange()) + Range.setTokenRange(isInExpansionTokenRange(End, SM)); return makeRangeFromFileLocs(Range, SM, LangOpts); } diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp --- a/clang/unittests/Lex/LexerTest.cpp +++ b/clang/unittests/Lex/LexerTest.cpp @@ -25,6 +25,7 @@ #include "clang/Lex/PreprocessorOptions.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include #include namespace { @@ -65,7 +66,7 @@ std::vector Lex(StringRef Source) { TrivialModuleLoader ModLoader; - auto PP = CreatePP(Source, ModLoader); + PP = CreatePP(Source, ModLoader); std::vector toks; while (1) { @@ -109,6 +110,7 @@ LangOptions LangOpts; std::shared_ptr TargetOpts; IntrusiveRefCntPtr Target; + std::unique_ptr PP; }; TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { @@ -264,12 +266,14 @@ TEST_F(LexerTest, LexAPI) { std::vector ExpectedTokens; + // Line 1 (after the #defines) ExpectedTokens.push_back(tok::l_square); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::r_square); ExpectedTokens.push_back(tok::l_square); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::r_square); + // Line 2 ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); @@ -357,6 +361,65 @@ EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts)); } +TEST_F(LexerTest, HandlesSplitTokens) { + std::vector ExpectedTokens; + // Line 1 (after the #defines) + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::less); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::less); + ExpectedTokens.push_back(tok::greatergreater); + // Line 2 + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::less); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::less); + ExpectedTokens.push_back(tok::greatergreater); + + std::vector toks = CheckLex("#define TY ty\n" + "#define RANGLE ty>\n" + "TY>\n" + "RANGLE", + ExpectedTokens); + + SourceLocation outerTyLoc = toks[0].getLocation(); + SourceLocation innerTyLoc = toks[2].getLocation(); + SourceLocation gtgtLoc = toks[4].getLocation(); + // Split the token to simulate the action of the parser and force creation of + // an `ExpansionTokenRange`. + SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1); + + // Verify that it only captures the first greater-then and not the second one. + CharSourceRange range = Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr, + LangOpts); + EXPECT_TRUE(range.isCharRange()); + EXPECT_EQ(range.getAsRange(), + SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1))); + + // Verify case where range begins in a macro expansion. + range = Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr, + LangOpts); + EXPECT_TRUE(range.isCharRange()); + EXPECT_EQ(range.getAsRange(), + SourceRange(SourceMgr.getExpansionLoc(outerTyLoc), + gtgtLoc.getLocWithOffset(1))); + + SourceLocation macroInnerTyLoc = toks[7].getLocation(); + SourceLocation macroGtgtLoc = toks[9].getLocation(); + // Split the token to simulate the action of the parser and force creation of + // an `ExpansionTokenRange`. + SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1); + + // Verify that it fails (because it only captures the first greater-then and + // not the second one, so it doesn't span the entire macro expansion). + range = Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc), + SourceMgr, LangOpts); + EXPECT_TRUE(range.isInvalid()); +} + TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) { std::vector toks = Lex("#define helper1 0\n"