diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -171,7 +171,6 @@ /// To build a token buffer use the TokenCollector class. You can also compute /// the spelled tokens of a file using the tokenize() helper. /// -/// FIXME: allow to map from spelled to expanded tokens when use-case shows up. /// FIXME: allow mappings into macro arguments. class TokenBuffer { public: @@ -228,6 +227,36 @@ llvm::Optional> spelledForExpanded(llvm::ArrayRef Expanded) const; + /// Find the subranges of expanded tokens, corresponding to \p Spelled. + /// + /// Some spelled tokens may not be present in the expanded token stream, so + /// this function can return an empty vector, e.g. for tokens of macro + /// directives or disabled preprocessor branches. + /// + /// Some spelled tokens can be duplicated in the expanded token stream + /// multiple times and this function will return multiple results in those + /// cases. This happens when \p Spelled is inside a macro argument. + /// + /// FIXME: return correct results on macro arguments. For now, we return an + /// empty list. + /// + /// (!) will return empty vector on tokens from #define body: + /// E.g. for the following example: + /// + /// #define FIRST(A) f1 A = A f2 + /// #define SECOND s + /// + /// a FIRST(arg) b SECOND c // expanded tokens are: a f1 arg = arg f2 b s + /// The results would be + /// spelled => expanded + /// ------------------------ + /// #define FIRST => {} + /// a FIRST(arg) => {a f1 arg = arg f2} + /// arg => {arg, arg} // arg #1 is before `=` and arg #2 is + /// // after `=` in the expanded tokens. + llvm::SmallVector, 1> + expandedForSpelled(llvm::ArrayRef Spelled) const; + /// An expansion produced by the preprocessor, includes macro expansions and /// preprocessor directives. Preprocessor always maps a non-empty range of /// spelled tokens to a (possibly empty) range of expanded tokens. Here is a @@ -317,6 +346,12 @@ std::pair spelledForExpandedToken(const syntax::Token *Expanded) const; + /// Returns a mapping starting before \p Spelled token, or nullptr if no + /// such mapping exists. + static const Mapping * + mappingStartingBeforeSpelled(const MarkedFile &F, + const syntax::Token *Spelled); + /// Token stream produced after preprocessing, conceputally this captures the /// same stream as 'clang -E' (excluding the preprocessor directives like /// #file, etc.). diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -213,19 +213,108 @@ // Our token could only be produced by the previous mapping. if (It == File.Mappings.begin()) { // No previous mapping, no need to modify offsets. - return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr}; + return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], + /*Mapping=*/nullptr}; } --It; // 'It' now points to last mapping that started before our token. // Check if the token is part of the mapping. if (ExpandedIndex < It->EndExpanded) - return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It}; + return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It}; // Not part of the mapping, use the index from previous mapping to compute the // corresponding spelled token. return { &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)], - /*Mapping*/ nullptr}; + /*Mapping=*/nullptr}; +} + +const TokenBuffer::Mapping * +TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F, + const syntax::Token *Spelled) { + assert(F.SpelledTokens.data() <= Spelled); + unsigned SpelledI = Spelled - F.SpelledTokens.data(); + assert(SpelledI < F.SpelledTokens.size()); + + auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) { + return M.BeginSpelled <= SpelledI; + }); + if (It == F.Mappings.begin()) + return nullptr; + --It; + return &*It; +} + +llvm::SmallVector, 1> +TokenBuffer::expandedForSpelled(llvm::ArrayRef Spelled) const { + if (Spelled.empty()) + return {}; + assert(Spelled.front().location().isFileID()); + + auto FID = sourceManager().getFileID(Spelled.front().location()); + auto It = Files.find(FID); + assert(It != Files.end()); + + const MarkedFile &File = It->second; + // `Spelled` must be a subrange of `File.SpelledTokens`. + assert(File.SpelledTokens.data() <= Spelled.data()); + assert(Spelled.size() <= File.SpelledTokens.size()); +#ifndef NDEBUG + auto T1 = Spelled.back().location(); + auto T2 = File.SpelledTokens.back().location(); + assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2)); +#endif + + auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front()); + unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data(); + assert(SpelledFrontI < File.SpelledTokens.size()); + unsigned ExpandedBegin; + if (!FrontMapping) { + // No mapping that starts before the first token of Spelled, we don't have + // to modify offsets. + ExpandedBegin = File.BeginExpanded + SpelledFrontI; + } else if (SpelledFrontI < FrontMapping->EndSpelled) { + // This mapping applies to Spelled tokens. + if (SpelledFrontI != FrontMapping->BeginSpelled) { + // Spelled tokens don't cover the entire mapping, returning empty result. + return {}; // FIXME: support macro arguments. + } + // Spelled tokens start at the beginning of this mapping. + ExpandedBegin = FrontMapping->BeginExpanded; + } else { + // Spelled tokens start after the mapping ends (they start in the hole + // between 2 mappings, or between a mapping and end of the file). + ExpandedBegin = + FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled); + } + + auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back()); + unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data(); + unsigned ExpandedEnd; + if (!BackMapping) { + // No mapping that starts before the last token of Spelled, we don't have to + // modify offsets. + ExpandedEnd = File.BeginExpanded + SpelledBackI + 1; + } else if (SpelledBackI < BackMapping->EndSpelled) { + // This mapping applies to Spelled tokens. + if (SpelledBackI + 1 != BackMapping->EndSpelled) { + // Spelled tokens don't cover the entire mapping, returning empty result. + return {}; // FIXME: support macro arguments. + } + ExpandedEnd = BackMapping->EndExpanded; + } else { + // Spelled tokens end after the mapping ends. + ExpandedEnd = + BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1; + } + + assert(ExpandedBegin < ExpandedTokens.size()); + assert(ExpandedEnd < ExpandedTokens.size()); + // Avoid returning empty ranges. + if (ExpandedBegin == ExpandedEnd) + return {}; + return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin, + ExpandedTokens.data() + ExpandedEnd)}; } llvm::ArrayRef TokenBuffer::spelledTokens(FileID FID) const { @@ -529,6 +618,20 @@ // mapping for those as they did not have expanded counterparts. fillGapsAtEndOfFiles(); +#ifndef NDEBUG + for (auto &pair : Result.Files) { + auto &mappings = pair.second.Mappings; + assert(std::is_sorted( + mappings.begin(), mappings.end(), + [](const TokenBuffer::Mapping &M1, const TokenBuffer::Mapping &M2) { + return M1.BeginSpelled < M2.BeginSpelled && + M1.EndSpelled < M2.EndSpelled && + M1.BeginExpanded < M2.BeginExpanded && + M1.EndExpanded < M2.EndExpanded; + })); + } +#endif + return std::move(Result); } diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -57,6 +57,7 @@ using ::testing::Contains; using ::testing::ElementsAre; using ::testing::Field; +using ::testing::IsEmpty; using ::testing::Matcher; using ::testing::Not; using ::testing::Pointee; @@ -185,10 +186,14 @@ template llvm::ArrayRef findSubrange(llvm::ArrayRef Subrange, llvm::ArrayRef Range, Eq F) { - for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) { + assert(Subrange.size() >= 1); + if (Range.size() < Subrange.size()) + return llvm::makeArrayRef(Range.end(), Range.end()); + for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size(); + Begin <= Last; ++Begin) { auto It = Begin; - for (auto ItSub = Subrange.begin(); - ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) { + for (auto ItSub = Subrange.begin(); ItSub != Subrange.end(); + ++ItSub, ++It) { if (!F(*ItSub, *It)) goto continue_outer; } @@ -889,4 +894,111 @@ ASSERT_EQ(Code.points().size(), 8u); } +TEST_F(TokenBufferTest, ExpandedBySpelled) { + recordTokens(R"cpp( + a1 a2 a3 b1 b2 + )cpp"); + // Sanity check: expanded and spelled tokens are stored separately. + EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); + // Searching for subranges of expanded tokens should give the corresponding + // spelled ones. + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")), + ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")), + ElementsAre(SameRange(findExpanded("a1 a2 a3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")), + ElementsAre(SameRange(findExpanded("b1 b2")))); + + // Test search on simple macro expansions. + recordTokens(R"cpp( + #define A a1 a2 a3 + #define B b1 b2 + + A split B + )cpp"); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")), + ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()), + ElementsAre(SameRange(findExpanded("a1 a2 a3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()), + ElementsAre(SameRange(findExpanded("b1 b2")))); + + // Ranges not fully covering macro expansions should fail. + recordTokens(R"cpp( + #define ID(x) x + + ID(a) + )cpp"); + // Spelled don't cover entire mapping (missing ID token) -> empty result + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty()); + // Spelled don't cover entire mapping (missing ) token) -> empty result + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty()); + + // Recursive macro invocations. + recordTokens(R"cpp( + #define ID(x) x + #define B b1 b2 + + ID(ID(ID(a1) a2 a3)) split ID(B) + )cpp"); + + EXPECT_THAT( + Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")), + ElementsAre(SameRange(findExpanded("a1 a2 a3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")), + ElementsAre(SameRange(findExpanded("b1 b2")))); + EXPECT_THAT(Buffer.expandedForSpelled( + findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")), + ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2")))); + // FIXME: these should succeed, but we do not support macro arguments yet. + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty()); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")), + IsEmpty()); + + // Empty macro expansions. + recordTokens(R"cpp( + #define EMPTY + #define ID(X) X + + EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 + EMPTY EMPTY ID(4 5 6) split2 + ID(7 8 9) EMPTY EMPTY + )cpp"); + // Covered by empty expansions on one of both of the sides. + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")), + ElementsAre(SameRange(findExpanded("1 2 3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")), + ElementsAre(SameRange(findExpanded("4 5 6")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")), + ElementsAre(SameRange(findExpanded("7 8 9")))); + // Including the empty macro expansions on the side. + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")), + ElementsAre(SameRange(findExpanded("1 2 3")))); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")), + ElementsAre(SameRange(findExpanded("1 2 3")))); + EXPECT_THAT( + Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")), + ElementsAre(SameRange(findExpanded("1 2 3")))); + + // Empty mappings coming from various directives. + recordTokens(R"cpp( + #define ID(X) X + ID(1) + #pragma lalala + not_mapped + )cpp"); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")), + IsEmpty()); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")), + IsEmpty()); + + // Empty macro expansion. + recordTokens(R"cpp( + #define EMPTY + EMPTY int a = 100; + )cpp"); + EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()), + IsEmpty()); +} + } // namespace diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -1215,6 +1215,9 @@ | `-} `-} )txt"); +} + +TEST_F(SyntaxTreeTest, ModifiableNodes) { // All nodes can be mutated. expectTreeDumpEqual( R"cpp(