diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -197,18 +197,20 @@ /// token range R. llvm::ArrayRef expandedTokens(SourceRange R) const; - /// Find the subrange of spelled tokens that produced the corresponding \p - /// Expanded tokens. + /// Returns the subrange of spelled tokens corresponding to AST node spanning + /// \p Expanded. This is the text that should be replaced if a refactoring + /// were to rewrite the node. If \p Expanded is empty, the returned value is + /// llvm::None. /// - /// EXPECTS: \p Expanded is a subrange of expandedTokens(). - /// - /// Will fail if the expanded tokens do not correspond to a - /// sequence of spelled tokens. E.g. for the following example: + /// Will fail if the expanded tokens do not correspond to a sequence of + /// spelled tokens. E.g. for the following example: /// /// #define FIRST f1 f2 f3 /// #define SECOND s1 s2 s3 + /// #define ID2(X, Y) X Y /// /// a FIRST b SECOND c // expanded tokens are: a f1 f2 f3 b s1 s2 s3 c + /// d ID2(e f g, h) i // expanded tokens are: d e f g h i /// /// the results would be: /// expanded => spelled @@ -218,8 +220,10 @@ /// a f1 f2 f3 => a FIRST /// a f1 => can't map /// s1 s2 => can't map + /// e f => e f + /// g h => can't map /// - /// If \p Expanded is empty, the returned value is llvm::None. + /// EXPECTS: \p Expanded is a subrange of expandedTokens(). /// Complexity is logarithmic. llvm::Optional> spelledForExpanded(llvm::ArrayRef Expanded) const; diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -35,6 +35,69 @@ using namespace clang; using namespace clang::syntax; +namespace { +// Finds the smallest consecutive subsuquence of Toks that covers R. +llvm::ArrayRef +getTokensCovering(llvm::ArrayRef Toks, SourceRange R, + const SourceManager &SM) { + if (R.isInvalid()) + return {}; + const syntax::Token *Begin = + llvm::partition_point(Toks, [&](const syntax::Token &T) { + return SM.isBeforeInTranslationUnit(T.location(), R.getBegin()); + }); + const syntax::Token *End = + llvm::partition_point(Toks, [&](const syntax::Token &T) { + return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location()); + }); + if (Begin > End) + return {}; + return {Begin, End}; +} + +// Finds the smallest expansion range that contains expanded tokens First and +// Last, e.g.: +// #define ID(x) x +// ID(ID(ID(a1) a2)) +// ~~ -> a1 +// ~~ -> a2 +// ~~~~~~~~~ -> a1 a2 +SourceRange findCommonRangeForMacroArgs(const syntax::Token &First, + const syntax::Token &Last, + const SourceManager &SM) { + SourceRange Res; + auto FirstLoc = First.location(), LastLoc = Last.location(); + // Keep traversing up the spelling chain as longs as tokens are part of the + // same expansion. + while (!FirstLoc.isFileID() && !LastLoc.isFileID()) { + auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion(); + auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion(); + // Stop if expansions have diverged. + if (ExpInfoFirst.getExpansionLocStart() != + ExpInfoLast.getExpansionLocStart()) + break; + // Do not continue into macro bodies. + if (!ExpInfoFirst.isMacroArgExpansion() || + !ExpInfoLast.isMacroArgExpansion()) + break; + FirstLoc = SM.getImmediateSpellingLoc(FirstLoc); + LastLoc = SM.getImmediateSpellingLoc(LastLoc); + // Update the result afterwards, as we want the tokens that triggered the + // expansion. + Res = {FirstLoc, LastLoc}; + } + // Normally mapping back to expansion location here only changes FileID, as + // we've already found some tokens expanded from the same macro argument, and + // they should map to a consecutive subset of spelled tokens. Unfortunately + // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations + // based on their FileID in addition to offsets. So even though we are + // referring to same tokens, SourceManager might tell us that one is before + // the other if they've got different FileIDs. + return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange(); +} + +} // namespace + syntax::Token::Token(SourceLocation Location, unsigned Length, tok::TokenKind Kind) : Location(Location), Length(Length), Kind(Kind) { @@ -121,19 +184,7 @@ } llvm::ArrayRef TokenBuffer::expandedTokens(SourceRange R) const { - if (R.isInvalid()) - return {}; - const Token *Begin = - llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { - return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin()); - }); - const Token *End = - llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { - return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location()); - }); - if (Begin > End) - return {}; - return {Begin, End}; + return getTokensCovering(expandedTokens(), R, *SourceMgr); } CharSourceRange FileRange::toCharRange(const SourceManager &SM) const { @@ -206,8 +257,6 @@ if (Expanded.empty()) return llvm::None; - // FIXME: also allow changes uniquely mapping to macro arguments. - const syntax::Token *BeginSpelled; const Mapping *BeginMapping; std::tie(BeginSpelled, BeginMapping) = @@ -225,12 +274,28 @@ const MarkedFile &File = Files.find(FID)->second; - // Do not allow changes that cross macro expansion boundaries. + // If both tokens are coming from a macro argument expansion, try and map to + // smallest part of the macro argument. BeginMapping && LastMapping check is + // only for performance, they are a prerequisite for Expanded.front() and + // Expanded.back() being part of a macro arg expansion. + if (BeginMapping && LastMapping && + SourceMgr->isMacroArgExpansion(Expanded.front().location()) && + SourceMgr->isMacroArgExpansion(Expanded.back().location())) { + auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(), + Expanded.back(), *SourceMgr); + // It might be the case that tokens are arguments of different macro calls, + // in that case we should continue with the logic below instead of returning + // an empty range. + if (CommonRange.isValid()) + return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr); + } + + // Do not allow changes that doesn't cover full expansion. unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data(); unsigned EndExpanded = Expanded.end() - ExpandedTokens.data(); - if (BeginMapping && BeginMapping->BeginExpanded < BeginExpanded) + if (BeginMapping && BeginExpanded != BeginMapping->BeginExpanded) return llvm::None; - if (LastMapping && EndExpanded < LastMapping->EndExpanded) + if (LastMapping && LastMapping->EndExpanded != EndExpanded) return llvm::None; // All is good, return the result. return llvm::makeArrayRef( diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -627,6 +627,7 @@ A split B )cpp"); + // Ranges going across expansion boundaries. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), ValueIs(SameRange(findSpelled("A split B")))); EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), @@ -647,22 +648,28 @@ ID(ID(ID(a1) a2 a3)) split ID(B) )cpp"); - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), - ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")))); EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), - ValueIs(SameRange(findSpelled("ID ( B )")))); + ValueIs(SameRange(findSpelled("( B").drop_front()))); EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), ValueIs(SameRange(findSpelled( "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")))); - // Ranges crossing macro call boundaries. - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")), - llvm::None); - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")), - llvm::None); - // FIXME: next two examples should map to macro arguments, but currently they - // fail. - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None); - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); + // Mixed ranges with expanded and spelled tokens. + EXPECT_THAT( + Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")), + ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split")))); + EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")), + ValueIs(SameRange(findSpelled("split ID ( B )")))); + // Macro arguments + EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")), + ValueIs(SameRange(findSpelled("a1")))); + EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")), + ValueIs(SameRange(findSpelled("a2")))); + EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")), + ValueIs(SameRange(findSpelled("a3")))); + EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")), + ValueIs(SameRange(findSpelled("ID ( a1 ) a2")))); + EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), + ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3")))); // Empty macro expansions. recordTokens(R"cpp( @@ -674,11 +681,11 @@ ID(7 8 9) EMPTY EMPTY )cpp"); EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")), - ValueIs(SameRange(findSpelled("ID ( 1 2 3 )")))); + ValueIs(SameRange(findSpelled("1 2 3")))); EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")), - ValueIs(SameRange(findSpelled("ID ( 4 5 6 )")))); + ValueIs(SameRange(findSpelled("4 5 6")))); EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")), - ValueIs(SameRange(findSpelled("ID ( 7 8 9 )")))); + ValueIs(SameRange(findSpelled("7 8 9")))); // Empty mappings coming from various directives. recordTokens(R"cpp( @@ -689,6 +696,27 @@ )cpp"); EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")), ValueIs(SameRange(findSpelled("not_mapped")))); + + // Multiple macro arguments + recordTokens(R"cpp( + #define ID(X) X + #define ID2(X, Y) X Y + + ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7) + )cpp"); + // Should fail, spans multiple arguments. + EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); + EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")), + ValueIs(SameRange(findSpelled("ID ( a2 ) a3")))); + EXPECT_THAT( + Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), + ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )")))); + EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")), + ValueIs(SameRange(findSpelled("a5 a6")))); + EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")), + ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )")))); + // Should fail, spans multiple invocations. + EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")), llvm::None); } TEST_F(TokenBufferTest, ExpandedTokensForRange) {