diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -228,6 +228,35 @@ llvm::Optional> spelledForExpanded(llvm::ArrayRef Expanded) const; + /// Find the subranges of expanded tokens, corresponding to \p Spelled. + /// Some spelled tokens may not be present in the expanded token stream, so + /// this function can return an empty vector, e.g. for tokens of macro + /// directives or disabled preprocessor branches. + /// + /// Some spelled tokens can be duplicated in the expanded token stream + /// multiple times and this function will return multiple results in those + /// cases. This happens when \p Spelled is inside a macro argument. + /// + /// FIXME: return correct results on macro arguments. For now, we return an + /// empty list. + /// + /// (!) will return empty vector on tokens from #define body: + /// E.g. for the following example: + /// + /// #define FIRST(A) f1 A = A f2 + /// #define SECOND s + /// + /// a FIRST(arg) b SECOND c // expanded tokens are: a f1 arg = arg f2 b s + /// The results would be + /// spelled => expanded + /// ------------------------ + /// #define FIRST => {} + /// a FIRST(arg) => {a f1 arg = arg f2} + /// arg => {arg, arg} // arg #1 is before `=` and arg #2 is + /// // after `=` in the expanded tokens. + llvm::SmallVector, 1> + expandedForSpelled(llvm::ArrayRef Spelled) const; + /// An expansion produced by the preprocessor, includes macro expansions and /// preprocessor directives. Preprocessor always maps a non-empty range of /// spelled tokens to a (possibly empty) range of expanded tokens. Here is a @@ -317,6 +346,10 @@ std::pair spelledForExpandedToken(const syntax::Token *Expanded) const; + static const Mapping * + mappingStartingBeforeSpelled(const MarkedFile &F, + const syntax::Token *Spelled); + /// Token stream produced after preprocessing, conceputally this captures the /// same stream as 'clang -E' (excluding the preprocessor directives like /// #file, etc.). diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -213,19 +213,90 @@ // Our token could only be produced by the previous mapping. if (It == File.Mappings.begin()) { // No previous mapping, no need to modify offsets. - return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr}; + return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], + /*Mapping=*/nullptr}; } --It; // 'It' now points to last mapping that started before our token. // Check if the token is part of the mapping. if (ExpandedIndex < It->EndExpanded) - return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It}; + return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It}; // Not part of the mapping, use the index from previous mapping to compute the // corresponding spelled token. return { &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)], - /*Mapping*/ nullptr}; + /*Mapping=*/nullptr}; +} + +const TokenBuffer::Mapping * +TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F, + const syntax::Token *Spelled) { + assert(F.SpelledTokens.data() <= Spelled); + unsigned SpelledI = Spelled - F.SpelledTokens.data(); + assert(SpelledI < F.SpelledTokens.size()); + + auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) { + return M.BeginSpelled <= SpelledI; + }); + if (It == F.Mappings.begin()) + return nullptr; + --It; + return &*It; +} + +llvm::SmallVector, 1> +TokenBuffer::expandedForSpelled(llvm::ArrayRef Spelled) const { + if (Spelled.empty()) + return {}; + assert(Spelled.front().location().isFileID()); + + auto FID = sourceManager().getFileID(Spelled.front().location()); + auto It = Files.find(FID); + assert(It != Files.end()); + + const MarkedFile &File = It->second; + // assert that Spelled is a subarray of File.SpelledTokens. + assert(File.SpelledTokens.data() <= Spelled.data()); + assert(Spelled.size() <= File.SpelledTokens.size()); + + auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front()); + unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data(); + unsigned ExpandedBegin; + if (!FrontMapping) { + // Requested spelled tokens are mapped in the first mapping. + ExpandedBegin = File.BeginExpanded + SpelledFrontI; + } else if (SpelledFrontI < FrontMapping->EndSpelled) { + // Requested spelled tokens start in the FrontMapping. + if (SpelledFrontI != FrontMapping->BeginSpelled) + return {}; // FIXME: support macro arguments. + ExpandedBegin = FrontMapping->BeginExpanded; + } else { + ExpandedBegin = + FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled); + } + + auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back()); + unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data(); + unsigned ExpandedEnd; + if (!BackMapping) { + ExpandedEnd = File.BeginExpanded + SpelledBackI + 1; + } else if (SpelledBackI < BackMapping->EndSpelled) { + if (SpelledBackI + 1 != BackMapping->EndSpelled) + return {}; // FIXME: support macro arguments. + ExpandedEnd = BackMapping->EndExpanded; + } else { + ExpandedEnd = + BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1; + } + + assert(ExpandedBegin < ExpandedTokens.size()); + assert(ExpandedEnd < ExpandedTokens.size()); + // Avoid returning empty ranges. + if (ExpandedBegin == ExpandedEnd) + return {}; + return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin, + ExpandedTokens.data() + ExpandedEnd)}; } llvm::ArrayRef TokenBuffer::spelledTokens(FileID FID) const { diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -57,6 +57,7 @@ using ::testing::Contains; using ::testing::ElementsAre; using ::testing::Field; +using ::testing::IsEmpty; using ::testing::Matcher; using ::testing::Not; using ::testing::Pointee; @@ -185,10 +186,14 @@ template llvm::ArrayRef findSubrange(llvm::ArrayRef Subrange, llvm::ArrayRef Range, Eq F) { - for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) { + assert(Subrange.size() >= 1); + if (Range.size() < Subrange.size()) + return llvm::makeArrayRef(Range.end(), Range.end()); + for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size(); + Begin <= Last; ++Begin) { auto It = Begin; - for (auto ItSub = Subrange.begin(); - ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) { + for (auto ItSub = Subrange.begin(); ItSub != Subrange.end(); + ++ItSub, ++It) { if (!F(*ItSub, *It)) goto continue_outer; } @@ -889,4 +894,100 @@ ASSERT_EQ(Code.points().size(), 8u); } +TEST_F(TokenBufferTest, ExpandedBySpelled) { + recordTokens(R"cpp( + a1 a2 a3 b1 b2 + )cpp"); + // Sanity check: expanded and spelled tokens are stored separately. + EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); + // Searching for subranges of expanded tokens should give the corresponding + // spelled ones. + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")), + ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")), + ElementsAre(SameRange(findExpanded("a1 a2 a3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")), + ElementsAre(SameRange(findExpanded("b1 b2")))); + + // Test search on simple macro expansions. + recordTokens(R"cpp( + #define A a1 a2 a3 + #define B b1 b2 + + A split B + )cpp"); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")), + ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()), + ElementsAre(SameRange(findExpanded("a1 a2 a3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()), + ElementsAre(SameRange(findExpanded("b1 b2")))); + + // Recursive macro invocations. + recordTokens(R"cpp( + #define ID(x) x + #define B b1 b2 + + ID(ID(ID(a1) a2 a3)) split ID(B) + )cpp"); + + EXPECT_THAT( + Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")), + ElementsAre(SameRange(findExpanded("a1 a2 a3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")), + ElementsAre(SameRange(findExpanded("b1 b2")))); + EXPECT_THAT(Buffer.expandedForSpelled( + findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")), + ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2")))); + // FIXME: these should succeed, but we do not support macro arguments yet. + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty()); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")), + IsEmpty()); + + // Empty macro expansions. + recordTokens(R"cpp( + #define EMPTY + #define ID(X) X + + EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 + EMPTY EMPTY ID(4 5 6) split2 + ID(7 8 9) EMPTY EMPTY + )cpp"); + // Covered by empty expansions on one of both of the sides. + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")), + ElementsAre(SameRange(findExpanded("1 2 3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")), + ElementsAre(SameRange(findExpanded("4 5 6")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")), + ElementsAre(SameRange(findExpanded("7 8 9")))); + // Including the empty macro expansions on the side. + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")), + ElementsAre(SameRange(findExpanded("1 2 3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")), + ElementsAre(SameRange(findExpanded("1 2 3")))); + EXPECT_THAT( + Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")), + ElementsAre(SameRange(findExpanded("1 2 3")))); + + // Empty mappings coming from various directives. + recordTokens(R"cpp( + #define ID(X) X + ID(1) + #pragma lalala + not_mapped + )cpp"); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")), + IsEmpty()); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")), + IsEmpty()); + + // Empty macro expansion. + recordTokens(R"cpp( + #define EMPTY + EMPTY int a = 100; + )cpp"); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()), + IsEmpty()); +} + } // namespace