diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -218,6 +218,35 @@ llvm::Optional> spelledForExpanded(llvm::ArrayRef Expanded) const; + /// Find the subranges of expanded tokens, corresponding to \p Spelled. + /// Some spelled tokens may not be present in the expanded token stream, so + /// this function can return an empty vector, e.g. for tokens of macro + /// directives or disabled preprocessor branches. + /// + /// Some spelled tokens can be duplicated in the expanded token stream + /// multiple times and this function will return multiple results in those + /// cases. This happens when \p Spelled is inside a macro argument. + /// + /// (!) will return empty vector on tokens from #define body: + /// E.g. for the following example: + /// + /// #define FIRST(A) f1 A = A f2 + /// #define SECOND s + /// + /// a FIRST(arg) b SECOND c // expanded tokens are: a f1 arg = arg f2 b s + /// The results would be + /// spelled => expanded + /// ------------------------ + /// #define FIRST => {} + /// a FIRST(arg) => {a f1 arg = arg f2} + /// arg => {arg, arg} // arg #1 is before `=` and arg #2 is + /// // after `=` in the expanded tokens. + /// FIXME: return correct results on macro arguments. For now, we return an + /// empty list. + /// EXPECTS: !Spelled.empty() + llvm::SmallVector, 1> + expandedForSpelled(llvm::ArrayRef Spelled) const; + /// An expansion produced by the preprocessor, includes macro expansions and /// preprocessor directives. Preprocessor always maps a non-empty range of /// spelled tokens to a (possibly empty) range of expanded tokens. Here is a @@ -303,6 +332,10 @@ std::pair spelledForExpandedToken(const syntax::Token *Expanded) const; + static const Mapping * + mappingStartingBeforeSpelled(const MarkedFile &F, + const syntax::Token *Spelled); + /// Token stream produced after preprocessing, conceputally this captures the /// same stream as 'clang -E' (excluding the preprocessor directives like /// #file, etc.). diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -67,7 +67,8 @@ auto F = First.range(SM); auto L = Last.range(SM); assert(F.file() == L.file() && "tokens from different files"); - assert((F == L || F.endOffset() <= L.beginOffset()) && "wrong order of tokens"); + assert((F == L || F.endOffset() <= L.beginOffset()) && + "wrong order of tokens"); return FileRange(F.file(), F.beginOffset(), L.endOffset()); } @@ -176,6 +177,72 @@ /*Mapping*/ nullptr}; } +const TokenBuffer::Mapping * +TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F, + const syntax::Token *Spelled) { + assert(F.SpelledTokens.data() <= Spelled); + unsigned SpelledI = Spelled - F.SpelledTokens.data(); + assert(SpelledI < F.SpelledTokens.size()); + + auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) { + return M.BeginSpelled <= SpelledI; + }); + if (It == F.Mappings.begin()) + return nullptr; + --It; + return &*It; +} + +llvm::SmallVector, 1> +TokenBuffer::expandedForSpelled(llvm::ArrayRef Spelled) const { + assert(!Spelled.empty()); + assert(Spelled.front().location().isFileID()); + + auto FID = sourceManager().getFileID(Spelled.front().location()); + auto It = Files.find(FID); + assert(It != Files.end()); + + auto &File = It->second; + assert(File.SpelledTokens.data() <= Spelled.data()); + assert(Spelled.size() <= File.SpelledTokens.size()); + + auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front()); + unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data(); + unsigned ExpandedBegin; + if (!FrontMapping) { + ExpandedBegin = File.BeginExpanded + SpelledFrontI; + } else if (SpelledFrontI < FrontMapping->EndSpelled) { + if (SpelledFrontI != FrontMapping->BeginSpelled) + return {}; // FIXME: support macro arguments. + ExpandedBegin = FrontMapping->BeginExpanded; + } else { + ExpandedBegin = + FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled); + } + + auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back()); + unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data(); + unsigned ExpandedEnd; + if (!BackMapping) { + ExpandedEnd = File.BeginExpanded + SpelledBackI + 1; + } else if (SpelledBackI < BackMapping->EndSpelled) { + if (SpelledBackI + 1 != BackMapping->EndSpelled) + return {}; // FIXME: support macro arguments. + ExpandedEnd = BackMapping->EndExpanded; + } else { + ExpandedEnd = + BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1; + } + + assert(ExpandedBegin < ExpandedTokens.size()); + assert(ExpandedEnd < ExpandedTokens.size()); + // Avoid returning empty ranges. + if (ExpandedBegin == ExpandedEnd) + return {}; + return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin, + ExpandedTokens.data() + ExpandedEnd)}; +} + llvm::ArrayRef TokenBuffer::spelledTokens(FileID FID) const { auto It = Files.find(FID); assert(It != Files.end()); diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -57,6 +57,7 @@ using ::testing::Contains; using ::testing::ElementsAre; using ::testing::Field; +using ::testing::IsEmpty; using ::testing::Matcher; using ::testing::Not; using ::testing::StartsWith; @@ -178,10 +179,14 @@ template llvm::ArrayRef findSubrange(llvm::ArrayRef Subrange, llvm::ArrayRef Range, Eq F) { - for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) { + assert(Subrange.size() >= 1); + if (Range.size() < Subrange.size()) + return llvm::makeArrayRef(Range.end(), Range.end()); + for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size(); + Begin <= Last; ++Begin) { auto It = Begin; - for (auto ItSub = Subrange.begin(); - ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) { + for (auto ItSub = Subrange.begin(); ItSub != Subrange.end(); + ++ItSub, ++It) { if (!F(*ItSub, *It)) goto continue_outer; } @@ -664,6 +669,101 @@ ValueIs(SameRange(findSpelled("not_mapped")))); } +TEST_F(TokenBufferTest, ExpandedBySpelled) { + recordTokens(R"cpp( + a1 a2 a3 b1 b2 + )cpp"); + // Sanity check: expanded and spelled tokens are stored separately. + EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); + // Searching for subranges of expanded tokens should give the corresponding + // spelled ones. + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")), + ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")), + ElementsAre(SameRange(findExpanded("a1 a2 a3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")), + ElementsAre(SameRange(findExpanded("b1 b2")))); + + // Test search on simple macro expansions. + recordTokens(R"cpp( + #define A a1 a2 a3 + #define B b1 b2 + + A split B + )cpp"); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")), + ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()), + ElementsAre(SameRange(findExpanded("a1 a2 a3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()), + ElementsAre(SameRange(findExpanded("b1 b2")))); + + // Recursive macro invocations. + recordTokens(R"cpp( + #define ID(x) x + #define B b1 b2 + + ID(ID(ID(a1) a2 a3)) split ID(B) + )cpp"); + + EXPECT_THAT( + Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")), + ElementsAre(SameRange(findExpanded("a1 a2 a3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")), + ElementsAre(SameRange(findExpanded("b1 b2")))); + EXPECT_THAT(Buffer.expandedForSpelled( + findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")), + ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2")))); + // FIXME: these should succeed, but we do not support macro arguments yet. + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty()); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")), + IsEmpty()); + + // Empty macro expansions. + recordTokens(R"cpp( + #define EMPTY + #define ID(X) X + + EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 + EMPTY EMPTY ID(4 5 6) split2 + ID(7 8 9) EMPTY EMPTY + )cpp"); + // Covered by empty expansions on one of both of the sides. + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")), + ElementsAre(SameRange(findExpanded("1 2 3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")), + ElementsAre(SameRange(findExpanded("4 5 6")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")), + ElementsAre(SameRange(findExpanded("7 8 9")))); + // Including the empty macro expansions on the side. + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")), + ElementsAre(SameRange(findExpanded("1 2 3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")), + ElementsAre(SameRange(findExpanded("1 2 3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")), + ElementsAre(SameRange(findExpanded("1 2 3")))); + + // Empty mappings coming from various directives. + recordTokens(R"cpp( + #define ID(X) X + ID(1) + #pragma lalala + not_mapped + )cpp"); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")), + IsEmpty()); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")), + IsEmpty()); + + // Empty macro expansion. + recordTokens(R"cpp( + #define EMPTY + EMPTY int a = 100; + )cpp"); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()), + IsEmpty()); +} + TEST_F(TokenBufferTest, ExpandedTokensForRange) { recordTokens(R"cpp( #define SIGN(X) X##_washere