diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -114,16 +114,16 @@ // Attach preprocessor hooks such that preamble events will be injected at // the appropriate time. // Events will be delivered to the *currently registered* PP callbacks. - static void attach(const IncludeStructure &Includes, - CompilerInstance &Clang) { + static void attach(const IncludeStructure &Includes, CompilerInstance &Clang, + const PreambleBounds &PB) { auto &PP = Clang.getPreprocessor(); auto *ExistingCallbacks = PP.getPPCallbacks(); // No need to replay events if nobody is listening. if (!ExistingCallbacks) return; - PP.addPPCallbacks(std::unique_ptr( - new ReplayPreamble(Includes, ExistingCallbacks, - Clang.getSourceManager(), PP, Clang.getLangOpts()))); + PP.addPPCallbacks(std::unique_ptr(new ReplayPreamble( + Includes, ExistingCallbacks, Clang.getSourceManager(), PP, + Clang.getLangOpts(), PB))); // We're relying on the fact that addPPCallbacks keeps the old PPCallbacks // around, creating a chaining wrapper. Guard against other implementations. assert(PP.getPPCallbacks() != ExistingCallbacks && @@ -133,9 +133,13 @@ private: ReplayPreamble(const IncludeStructure &Includes, PPCallbacks *Delegate, const SourceManager &SM, Preprocessor &PP, - const LangOptions &LangOpts) - : Includes(Includes), Delegate(Delegate), SM(SM), PP(PP), - LangOpts(LangOpts) {} + const LangOptions &LangOpts, const PreambleBounds &PB) + : Includes(Includes), Delegate(Delegate), SM(SM), PP(PP) { + // Only tokenize the preamble section of the main file, as we are not + // interested in the rest of the tokens. + MainFileTokens = syntax::tokenize( + syntax::FileRange(SM.getMainFileID(), 0, PB.Size), SM, LangOpts); + } // In a normal compile, the preamble traverses the following structure: // @@ -167,29 +171,49 @@ if (auto FE = SM.getFileManager().getFile(Inc.Resolved)) File = *FE; + // Re-lex the #include directive to find its interesting parts. + auto HashLoc = SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset); + auto HashTok = llvm::partition_point(MainFileTokens, + [&HashLoc](const syntax::Token &T) { + return T.location() < HashLoc; + }); + assert(HashTok != MainFileTokens.end() && HashTok->kind() == tok::hash); + + auto IncTok = std::next(HashTok); + assert(IncTok != MainFileTokens.end()); + + auto FileTok = std::next(IncTok); + assert(FileTok != MainFileTokens.end()); + + // Create a fake import/include token, none of the callers seem to care + // about clang::Token::Flags. + Token IncludeTok; + IncludeTok.startToken(); + IncludeTok.setLocation(IncTok->location()); + IncludeTok.setLength(IncTok->length()); + IncludeTok.setKind(tok::raw_identifier); + IncludeTok.setRawIdentifierData(IncTok->text(SM).data()); + PP.LookUpIdentifierInfo(IncludeTok); + + // Same here, create a fake one for Filename, including angles or quotes. + Token FilenameTok; + FilenameTok.startToken(); + FilenameTok.setLocation(FileTok->location()); + // Note that we can't make use of FileTok->length/text in here as in the + // case of angled includes this will contain tok::less instead of + // filename. Whereas Inc.Written contains the full header name including + // quotes/angles. + FilenameTok.setLength(Inc.Written.length()); + FilenameTok.setKind(tok::header_name); + FilenameTok.setLiteralData(Inc.Written.data()); + llvm::StringRef WrittenFilename = llvm::StringRef(Inc.Written).drop_front().drop_back(); - bool Angled = llvm::StringRef(Inc.Written).startswith("<"); - - // Re-lex the #include directive to find its interesting parts. - llvm::StringRef Src = SM.getBufferData(SM.getMainFileID()); - Lexer RawLexer(SM.getLocForStartOfFile(SM.getMainFileID()), LangOpts, - Src.begin(), Src.begin() + Inc.HashOffset, Src.end()); - Token HashTok, IncludeTok, FilenameTok; - RawLexer.LexFromRawLexer(HashTok); - assert(HashTok.getKind() == tok::hash); - RawLexer.setParsingPreprocessorDirective(true); - RawLexer.LexFromRawLexer(IncludeTok); - IdentifierInfo *II = PP.getIdentifierInfo(IncludeTok.getRawIdentifier()); - IncludeTok.setIdentifierInfo(II); - IncludeTok.setKind(II->getTokenID()); - RawLexer.LexIncludeFilename(FilenameTok); - - Delegate->InclusionDirective( - HashTok.getLocation(), IncludeTok, WrittenFilename, Angled, - CharSourceRange::getCharRange(FilenameTok.getLocation(), - FilenameTok.getEndLoc()), - File, "SearchPath", "RelPath", /*Imported=*/nullptr, Inc.FileKind); + Delegate->InclusionDirective(HashTok->location(), IncludeTok, + WrittenFilename, Inc.Written.front() == '<', + FileTok->range(SM).toCharRange(SM), File, + "SearchPath", "RelPath", + /*Imported=*/nullptr, Inc.FileKind); if (File) // FIXME: Use correctly named FileEntryRef. Delegate->FileSkipped(FileEntryRef(File->getName(), *File), FilenameTok, @@ -205,7 +229,7 @@ PPCallbacks *Delegate; const SourceManager &SM; Preprocessor &PP; - const LangOptions &LangOpts; + std::vector MainFileTokens; }; } // namespace @@ -335,7 +359,7 @@ auto Includes = Preamble ? Preamble->Includes : IncludeStructure{}; // Replay the preamble includes so that clang-tidy checks can see them. if (Preamble) - ReplayPreamble::attach(Includes, *Clang); + ReplayPreamble::attach(Includes, *Clang, Preamble->Preamble.getBounds()); // Important: collectIncludeStructure is registered *after* ReplayPreamble! // Otherwise we would collect the replayed includes again... // (We can't *just* use the replayed includes, they don't have Resolved path). diff --git a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp --- a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp +++ b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include "../../clang-tidy/ClangTidyModule.h" +#include "../../clang-tidy/ClangTidyModuleRegistry.h" #include "AST.h" #include "Annotations.h" #include "Compiler.h" @@ -20,8 +22,13 @@ #include "TestFS.h" #include "TestTU.h" #include "clang/AST/DeclTemplate.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Token.h" #include "clang/Tooling/Syntax/Tokens.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock-matchers.h" #include "gmock/gmock.h" @@ -296,6 +303,112 @@ testing::UnorderedElementsAreArray(TestCase.points())); } +TEST(ParsedASTTest, ReplayPreambleForTidyCheckers) { + struct Inclusion { + Inclusion(const SourceManager &SM, SourceLocation HashLoc, + const Token &IncludeTok, llvm::StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange) + : HashOffset(SM.getDecomposedLoc(HashLoc).second), IncTok(IncludeTok), + IncDirective(IncludeTok.getIdentifierInfo()->getName()), + FileNameOffset(SM.getDecomposedLoc(FilenameRange.getBegin()).second), + FileName(FileName), IsAngled(IsAngled) {} + size_t HashOffset; + syntax::Token IncTok; + llvm::StringRef IncDirective; + size_t FileNameOffset; + llvm::StringRef FileName; + bool IsAngled; + }; + static std::vector Includes; + static std::vector SkippedFiles; + struct ReplayPreamblePPCallback : public PPCallbacks { + const SourceManager &SM; + explicit ReplayPreamblePPCallback(const SourceManager &SM) : SM(SM) {} + + void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, + StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, const FileEntry *, + StringRef, StringRef, const Module *, + SrcMgr::CharacteristicKind) override { + Includes.emplace_back(SM, HashLoc, IncludeTok, FileName, IsAngled, + FilenameRange); + } + + void FileSkipped(const FileEntryRef &, const Token &FilenameTok, + SrcMgr::CharacteristicKind) override { + SkippedFiles.emplace_back(FilenameTok); + } + }; + struct ReplayPreambleCheck : public tidy::ClangTidyCheck { + ReplayPreambleCheck(StringRef Name, tidy::ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, + Preprocessor *ModuleExpanderPP) override { + PP->addPPCallbacks(::std::make_unique(SM)); + } + }; + struct ReplayPreambleModule : public tidy::ClangTidyModule { + void + addCheckFactories(tidy::ClangTidyCheckFactories &CheckFactories) override { + CheckFactories.registerCheck( + "replay-preamble-check"); + } + }; + + static tidy::ClangTidyModuleRegistry::Add X( + "replay-preamble-module", ""); + TestTU TU; + // this check runs the preprocessor, we need to make sure it does not break + // our recording logic. + TU.ClangTidyChecks = "replay-preamble-check"; + llvm::Annotations Test(R"cpp( + ^#[[import]] ^"[[bar.h]]" + ^#[[include_next]] ^"[[baz.h]]" + ^#[[include]] ^<[[a.h]]>)cpp"); + llvm::StringRef Code = Test.code(); + TU.Code = Code.str(); + TU.AdditionalFiles["bar.h"] = ""; + TU.AdditionalFiles["baz.h"] = ""; + TU.AdditionalFiles["a.h"] = ""; + TU.ExtraArgs = {"-isystem."}; + + const auto &AST = TU.build(); + const auto &SM = AST.getSourceManager(); + + auto Ranges = Test.ranges(); + auto Points = Test.points(); + ASSERT_EQ(Ranges.size() / 2, Includes.size()); + ASSERT_EQ(SkippedFiles.size(), Includes.size()); + for (size_t I = 0; I < Includes.size(); ++I) { + const auto &Inc = Includes[I]; + + auto &P = Points[2 * I]; + EXPECT_EQ(Inc.HashOffset, P); + + auto R = Ranges[2 * I]; + const auto &IncRange = Inc.IncTok.range(SM); + EXPECT_EQ(IncRange.beginOffset(), R.Begin); + EXPECT_EQ(IncRange.endOffset(), R.End); + EXPECT_EQ(Inc.IncTok.kind(), tok::identifier); + EXPECT_EQ(Inc.IncDirective, Code.substr(R.Begin, R.End - R.Begin)); + + P = Points[2 * I + 1]; + EXPECT_EQ(Inc.FileNameOffset, P); + EXPECT_EQ(Inc.IsAngled, Code[P] == '<'); + + R = Ranges[2 * I + 1]; + EXPECT_EQ(Inc.FileName, Code.substr(R.Begin, R.End - R.Begin)); + + EXPECT_EQ(SM.getDecomposedLoc(SkippedFiles[I].location()).second, + Inc.FileNameOffset); + // This also contains quotes/angles so increment the range by one from both + // sides. + EXPECT_EQ(SkippedFiles[I].text(SM), + Code.substr(R.Begin - 1, R.End - R.Begin + 2)); + EXPECT_EQ(SkippedFiles[I].kind(), tok::header_name); + } +} + } // namespace } // namespace clangd } // namespace clang