diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -114,16 +114,16 @@ // Attach preprocessor hooks such that preamble events will be injected at // the appropriate time. // Events will be delivered to the *currently registered* PP callbacks. - static void attach(const IncludeStructure &Includes, - CompilerInstance &Clang) { + static void attach(const IncludeStructure &Includes, CompilerInstance &Clang, + const PreambleBounds &PB) { auto &PP = Clang.getPreprocessor(); auto *ExistingCallbacks = PP.getPPCallbacks(); // No need to replay events if nobody is listening. if (!ExistingCallbacks) return; - PP.addPPCallbacks(std::unique_ptr( - new ReplayPreamble(Includes, ExistingCallbacks, - Clang.getSourceManager(), PP, Clang.getLangOpts()))); + PP.addPPCallbacks(std::unique_ptr(new ReplayPreamble( + Includes, ExistingCallbacks, Clang.getSourceManager(), PP, + Clang.getLangOpts(), PB))); // We're relying on the fact that addPPCallbacks keeps the old PPCallbacks // around, creating a chaining wrapper. Guard against other implementations. assert(PP.getPPCallbacks() != ExistingCallbacks && @@ -133,9 +133,13 @@ private: ReplayPreamble(const IncludeStructure &Includes, PPCallbacks *Delegate, const SourceManager &SM, Preprocessor &PP, - const LangOptions &LangOpts) - : Includes(Includes), Delegate(Delegate), SM(SM), PP(PP), - LangOpts(LangOpts) {} + const LangOptions &LangOpts, const PreambleBounds &PB) + : Includes(Includes), Delegate(Delegate), SM(SM), PP(PP) { + // Only tokenize the preamble section of the main file, as we are not + // interested in the rest of the tokens. + MainFileTokens = syntax::tokenize( + syntax::FileRange(SM.getMainFileID(), 0, PB.Size), SM, LangOpts); + } // In a normal compile, the preamble traverses the following structure: // @@ -167,33 +171,53 @@ if (auto FE = SM.getFileManager().getFile(Inc.Resolved)) File = *FE; + // Re-lex the #include directive to find its interesting parts. + auto HashLoc = SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset); + auto HashTok = llvm::partition_point(MainFileTokens, + [&HashLoc](const syntax::Token &T) { + return T.location() < HashLoc; + }); + assert(HashTok != MainFileTokens.end() && HashTok->kind() == tok::hash); + + auto IncludeTok = std::next(HashTok); + assert(IncludeTok != MainFileTokens.end()); + + auto FileTok = std::next(IncludeTok); + assert(FileTok != MainFileTokens.end()); + + // Create a fake import/include token, none of the callers seem to care + // about clang::Token::Flags. + Token SynthesizedIncludeTok; + SynthesizedIncludeTok.startToken(); + SynthesizedIncludeTok.setLocation(IncludeTok->location()); + SynthesizedIncludeTok.setLength(IncludeTok->length()); + SynthesizedIncludeTok.setKind(tok::raw_identifier); + SynthesizedIncludeTok.setRawIdentifierData(IncludeTok->text(SM).data()); + PP.LookUpIdentifierInfo(SynthesizedIncludeTok); + + // Same here, create a fake one for Filename, including angles or quotes. + Token SynthesizedFilenameTok; + SynthesizedFilenameTok.startToken(); + SynthesizedFilenameTok.setLocation(FileTok->location()); + // Note that we can't make use of FileTok->length/text in here as in the + // case of angled includes this will contain tok::less instead of + // filename. Whereas Inc.Written contains the full header name including + // quotes/angles. + SynthesizedFilenameTok.setLength(Inc.Written.length()); + SynthesizedFilenameTok.setKind(tok::header_name); + SynthesizedFilenameTok.setLiteralData(Inc.Written.data()); + llvm::StringRef WrittenFilename = llvm::StringRef(Inc.Written).drop_front().drop_back(); - bool Angled = llvm::StringRef(Inc.Written).startswith("<"); - - // Re-lex the #include directive to find its interesting parts. - llvm::StringRef Src = SM.getBufferData(SM.getMainFileID()); - Lexer RawLexer(SM.getLocForStartOfFile(SM.getMainFileID()), LangOpts, - Src.begin(), Src.begin() + Inc.HashOffset, Src.end()); - Token HashTok, IncludeTok, FilenameTok; - RawLexer.LexFromRawLexer(HashTok); - assert(HashTok.getKind() == tok::hash); - RawLexer.setParsingPreprocessorDirective(true); - RawLexer.LexFromRawLexer(IncludeTok); - IdentifierInfo *II = PP.getIdentifierInfo(IncludeTok.getRawIdentifier()); - IncludeTok.setIdentifierInfo(II); - IncludeTok.setKind(II->getTokenID()); - RawLexer.LexIncludeFilename(FilenameTok); - - Delegate->InclusionDirective( - HashTok.getLocation(), IncludeTok, WrittenFilename, Angled, - CharSourceRange::getCharRange(FilenameTok.getLocation(), - FilenameTok.getEndLoc()), - File, "SearchPath", "RelPath", /*Imported=*/nullptr, Inc.FileKind); + Delegate->InclusionDirective(HashTok->location(), SynthesizedIncludeTok, + WrittenFilename, Inc.Written.front() == '<', + FileTok->range(SM).toCharRange(SM), File, + "SearchPath", "RelPath", + /*Imported=*/nullptr, Inc.FileKind); if (File) // FIXME: Use correctly named FileEntryRef. - Delegate->FileSkipped(FileEntryRef(File->getName(), *File), FilenameTok, - Inc.FileKind); + Delegate->FileSkipped(FileEntryRef(File->getName(), *File), + SynthesizedFilenameTok, Inc.FileKind); else { llvm::SmallString<1> UnusedRecovery; Delegate->FileNotFound(WrittenFilename, UnusedRecovery); @@ -205,7 +229,7 @@ PPCallbacks *Delegate; const SourceManager &SM; Preprocessor &PP; - const LangOptions &LangOpts; + std::vector MainFileTokens; }; } // namespace @@ -337,7 +361,7 @@ auto Includes = Preamble ? Preamble->Includes : IncludeStructure{}; // Replay the preamble includes so that clang-tidy checks can see them. if (Preamble) - ReplayPreamble::attach(Includes, *Clang); + ReplayPreamble::attach(Includes, *Clang, Preamble->Preamble.getBounds()); // Important: collectIncludeStructure is registered *after* ReplayPreamble! // Otherwise we would collect the replayed includes again... // (We can't *just* use the replayed includes, they don't have Resolved path). diff --git a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp --- a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp +++ b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include "../../clang-tidy/ClangTidyModule.h" +#include "../../clang-tidy/ClangTidyModuleRegistry.h" #include "AST.h" #include "Annotations.h" #include "Compiler.h" @@ -20,8 +22,13 @@ #include "TestFS.h" #include "TestTU.h" #include "clang/AST/DeclTemplate.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Token.h" #include "clang/Tooling/Syntax/Tokens.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock-matchers.h" #include "gmock/gmock.h" @@ -71,6 +78,10 @@ return false; } +MATCHER_P(RangeIs, R, "") { + return arg.beginOffset() == R.Begin && arg.endOffset() == R.End; +} + TEST(ParsedASTTest, TopLevelDecls) { TestTU TU; TU.HeaderCode = R"( @@ -296,6 +307,116 @@ testing::UnorderedElementsAreArray(TestCase.points())); } +TEST(ParsedASTTest, ReplayPreambleForTidyCheckers) { + struct Inclusion { + Inclusion(const SourceManager &SM, SourceLocation HashLoc, + const Token &IncludeTok, llvm::StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange) + : HashOffset(SM.getDecomposedLoc(HashLoc).second), IncTok(IncludeTok), + IncDirective(IncludeTok.getIdentifierInfo()->getName()), + FileNameOffset(SM.getDecomposedLoc(FilenameRange.getBegin()).second), + FileName(FileName), IsAngled(IsAngled) {} + size_t HashOffset; + syntax::Token IncTok; + llvm::StringRef IncDirective; + size_t FileNameOffset; + llvm::StringRef FileName; + bool IsAngled; + }; + static std::vector Includes; + static std::vector SkippedFiles; + struct ReplayPreamblePPCallback : public PPCallbacks { + const SourceManager &SM; + explicit ReplayPreamblePPCallback(const SourceManager &SM) : SM(SM) {} + + void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, + StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, const FileEntry *, + StringRef, StringRef, const Module *, + SrcMgr::CharacteristicKind) override { + Includes.emplace_back(SM, HashLoc, IncludeTok, FileName, IsAngled, + FilenameRange); + } + + void FileSkipped(const FileEntryRef &, const Token &FilenameTok, + SrcMgr::CharacteristicKind) override { + SkippedFiles.emplace_back(FilenameTok); + } + }; + struct ReplayPreambleCheck : public tidy::ClangTidyCheck { + ReplayPreambleCheck(StringRef Name, tidy::ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, + Preprocessor *ModuleExpanderPP) override { + PP->addPPCallbacks(::std::make_unique(SM)); + } + }; + struct ReplayPreambleModule : public tidy::ClangTidyModule { + void + addCheckFactories(tidy::ClangTidyCheckFactories &CheckFactories) override { + CheckFactories.registerCheck( + "replay-preamble-check"); + } + }; + + static tidy::ClangTidyModuleRegistry::Add X( + "replay-preamble-module", ""); + TestTU TU; + // This check records inclusion directives replayed by clangd. + TU.ClangTidyChecks = "replay-preamble-check"; + llvm::Annotations Test(R"cpp( + $hash^#$include[[import]] $filebegin^"$filerange[[bar.h]]" + $hash^#$include[[include_next]] $filebegin^"$filerange[[baz.h]]" + $hash^#$include[[include]] $filebegin^<$filerange[[a.h]]>)cpp"); + llvm::StringRef Code = Test.code(); + TU.Code = Code.str(); + TU.AdditionalFiles["bar.h"] = ""; + TU.AdditionalFiles["baz.h"] = ""; + TU.AdditionalFiles["a.h"] = ""; + TU.ExtraArgs = {"-isystem."}; + + const auto &AST = TU.build(); + const auto &SM = AST.getSourceManager(); + + auto HashLocs = Test.points("hash"); + ASSERT_EQ(HashLocs.size(), Includes.size()); + auto IncludeRanges = Test.ranges("include"); + ASSERT_EQ(IncludeRanges.size(), Includes.size()); + auto FileBeginLocs = Test.points("filebegin"); + ASSERT_EQ(FileBeginLocs.size(), Includes.size()); + auto FileRanges = Test.ranges("filerange"); + ASSERT_EQ(FileRanges.size(), Includes.size()); + + ASSERT_EQ(SkippedFiles.size(), Includes.size()); + for (size_t I = 0; I < Includes.size(); ++I) { + const auto &Inc = Includes[I]; + + EXPECT_EQ(Inc.HashOffset, HashLocs[I]); + + auto IncRange = IncludeRanges[I]; + EXPECT_THAT(Inc.IncTok.range(SM), RangeIs(IncRange)); + EXPECT_EQ(Inc.IncTok.kind(), tok::identifier); + EXPECT_EQ(Inc.IncDirective, + Code.substr(IncRange.Begin, IncRange.End - IncRange.Begin)); + + EXPECT_EQ(Inc.FileNameOffset, FileBeginLocs[I]); + EXPECT_EQ(Inc.IsAngled, Code[FileBeginLocs[I]] == '<'); + + auto FileRange = FileRanges[I]; + EXPECT_EQ(Inc.FileName, + Code.substr(FileRange.Begin, FileRange.End - FileRange.Begin)); + + EXPECT_EQ(SM.getDecomposedLoc(SkippedFiles[I].location()).second, + Inc.FileNameOffset); + // This also contains quotes/angles so increment the range by one from both + // sides. + EXPECT_EQ( + SkippedFiles[I].text(SM), + Code.substr(FileRange.Begin - 1, FileRange.End - FileRange.Begin + 2)); + EXPECT_EQ(SkippedFiles[I].kind(), tok::header_name); + } +} + } // namespace } // namespace clangd } // namespace clang