diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h @@ -17,6 +17,10 @@ #ifndef CLANG_INCLUDE_CLEANER_RECORD_H #define CLANG_INCLUDE_CLEANER_RECORD_H +#include "clang-include-cleaner/Types.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" #include #include @@ -24,11 +28,14 @@ class ASTConsumer; class ASTContext; class Decl; +class FileEntry; +class Preprocessor; +class PPCallbacks; namespace include_cleaner { -// Contains recorded parser events relevant to include-cleaner. +// Recorded main-file parser events relevant to include-cleaner. struct RecordedAST { - // The consumer (when installed into clang) tracks declarations in this. + // The consumer (when installed into clang) tracks declarations in `*this`. std::unique_ptr record(); ASTContext *Ctx = nullptr; @@ -39,6 +46,42 @@ std::vector Roots; }; +// Recorded main-file preprocessor events relevant to include-cleaner. +// +// This doesn't include facts that we record globally for the whole TU, even +// when they occur in the main file (e.g. IWYU pragmas). +struct RecordedPP { + // The callback (when installed into clang) tracks macros/includes in this. + std::unique_ptr record(const Preprocessor &PP); + + // Describes where macros were used in the main file. + std::vector MacroReferences; + + // A container for all includes present in the main file. + // Supports efficiently hit-testing Headers against Includes. + // FIXME: is there a more natural header for this class? + class RecordedIncludes { + public: + void add(const Include &); + + // All #includes seen, in the order they appear. + llvm::ArrayRef all() const { return All; } + + // Determine #includes that match a header (that provides a used symbol). + // + // Matching is based on the type of Header specified: + // - for a physical file like /path/to/foo.h, we check Resolved + // - for a logical file like , we check Spelled + llvm::SmallVector match(Header H) const; + + private: + std::vector All; + // Lookup structures for match(), values are index into All. + llvm::StringMap> BySpelling; + llvm::DenseMap> ByFile; + } Includes; +}; + } // namespace include_cleaner } // namespace clang diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h @@ -22,6 +22,7 @@ #ifndef CLANG_INCLUDE_CLEANER_TYPES_H #define CLANG_INCLUDE_CLEANER_TYPES_H +#include "clang/Basic/SourceLocation.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" #include #include @@ -32,41 +33,63 @@ namespace clang { class Decl; class FileEntry; +class IdentifierInfo; namespace include_cleaner { +/// We consider a macro to be a different symbol each time it is defined. +struct Macro { + IdentifierInfo *Name; + /// The location of the Name where the macro is defined. + SourceLocation Definition; + + bool operator==(const Macro &S) const { return Definition == S.Definition; } +}; + /// An entity that can be referenced in the code. struct Symbol { enum Kind { - // A canonical clang declaration. + /// A canonical clang declaration. Declaration, - // A recognized symbol from the standard library, like std::string. + /// A preprocessor macro, as defined in a specific location. + Macro, + /// A recognized symbol from the standard library, like std::string. Standard, }; Symbol(Decl &D) : Storage(&D) {} + Symbol(struct Macro M) : Storage(M) {} Symbol(tooling::stdlib::Symbol S) : Storage(S) {} Kind kind() const { return static_cast(Storage.index()); } bool operator==(const Symbol &RHS) const { return Storage == RHS.Storage; } + Decl &declaration() const { return *std::get(Storage); } + struct Macro macro() const { return std::get(Storage); } tooling::stdlib::Symbol standard() const { return std::get(Storage); } - Decl &declaration() const { return *std::get(Storage); } private: - // FIXME: Add support for macros. - std::variant Storage; + std::variant Storage; }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Symbol &); +/// Indicates that a piece of code refers to a symbol. +struct SymbolReference { + /// The symbol referred to. + Symbol Symbol; + /// The point in the code that refers to the symbol. + SourceLocation RefLocation; +}; +llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolReference &); + /// Represents a file that provides some symbol. Might not be includeable, e.g. /// built-in or main-file itself. struct Header { enum Kind { - // A source file parsed by clang. (May also be a buffer). + /// A source file parsed by clang. (May also be a buffer). Physical, - // A recognized standard library header, like . + /// A recognized standard library header, like . Standard, }; @@ -87,8 +110,17 @@ }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Header &); +/// A single #include directive written in the main file. +struct Include { + llvm::StringRef Spelled; // e.g. vector + const FileEntry *Resolved = nullptr; // e.g. /path/to/c++/v1/vector + // nullptr if the header was not found + SourceLocation HashLocation; // of hash in #include + unsigned Line = 0; // 1-based line number for #include +}; +llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Include &); + } // namespace include_cleaner } // namespace clang #endif - diff --git a/clang-tools-extra/include-cleaner/lib/Record.cpp b/clang-tools-extra/include-cleaner/lib/Record.cpp --- a/clang-tools-extra/include-cleaner/lib/Record.cpp +++ b/clang-tools-extra/include-cleaner/lib/Record.cpp @@ -11,8 +11,95 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclGroup.h" #include "clang/Basic/SourceManager.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Preprocessor.h" namespace clang::include_cleaner { +namespace { + +class PPRecorder : public PPCallbacks { +public: + PPRecorder(RecordedPP &Recorded, const Preprocessor &PP) + : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {} + + void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) override { + Active = SM.isWrittenInMainFile(Loc); + } + + void InclusionDirective(SourceLocation Hash, const Token &IncludeTok, + StringRef SpelledFilename, bool IsAngled, + CharSourceRange FilenameRange, + llvm::Optional File, + StringRef SearchPath, StringRef RelativePath, + const Module *, SrcMgr::CharacteristicKind) override { + if (!Active) + return; + + Include I; + I.HashLocation = Hash; + I.Resolved = File ? &File->getFileEntry() : nullptr; + I.Line = SM.getSpellingLineNumber(Hash); + I.Spelled = SpelledFilename; + Recorded.Includes.add(I); + } + + void MacroExpands(const Token &MacroName, const MacroDefinition &MD, + SourceRange Range, const MacroArgs *Args) override { + if (!Active) + return; + recordMacroRef(MacroName, *MD.getMacroInfo()); + } + + void MacroDefined(const Token &MacroName, const MacroDirective *MD) override { + if (!Active) + return; + + const auto *MI = MD->getMacroInfo(); + // The tokens of a macro definition could refer to a macro. + // Formally this reference isn't resolved until this macro is expanded, + // but we want to treat it as a reference anyway. + for (const auto &Tok : MI->tokens()) { + auto *II = Tok.getIdentifierInfo(); + // Could this token be a reference to a macro? (Not param to this macro). + if (!II || !II->hadMacroDefinition() || + llvm::is_contained(MI->params(), II)) + continue; + if (const MacroInfo *MI = PP.getMacroInfo(II)) + recordMacroRef(Tok, *MI); + } + } + + void MacroUndefined(const Token &MacroName, const MacroDefinition &MD, + const MacroDirective *) override { + if (!Active) + return; + if (const auto *MI = MD.getMacroInfo()) + recordMacroRef(MacroName, *MI); + } + +private: + void recordMacroRef(const Token &Tok, const MacroInfo &MI) { + if (MI.isBuiltinMacro()) + return; // __FILE__ is not a reference. + Recorded.MacroReferences.push_back( + SymbolReference{Macro{Tok.getIdentifierInfo(), MI.getDefinitionLoc()}, + Tok.getLocation()}); + } + + bool Active = false; + RecordedPP &Recorded; + const Preprocessor &PP; + const SourceManager &SM; +}; + +} // namespace + +std::unique_ptr RecordedPP::record(const Preprocessor &PP) { + return std::make_unique(*this, PP); +} std::unique_ptr RecordedAST::record() { class Recorder : public ASTConsumer { @@ -36,4 +123,31 @@ return std::make_unique(this); } +void RecordedPP::RecordedIncludes::add(const Include &I) { + unsigned Index = All.size(); + All.push_back(I); + auto BySpellingIt = BySpelling.try_emplace(I.Spelled).first; + All.back().Spelled = BySpellingIt->first(); // Now we own the backing string. + + BySpellingIt->second.push_back(Index); + if (I.Resolved) + ByFile[I.Resolved].push_back(Index); +} + +llvm::SmallVector +RecordedPP::RecordedIncludes::match(Header H) const { + llvm::SmallVector Result; + switch (H.kind()) { + case Header::Physical: + for (unsigned I : ByFile.lookup(H.physical())) + Result.push_back(&All[I]); + break; + case Header::Standard: + for (unsigned I : BySpelling.lookup(H.standard().name().trim("<>"))) + Result.push_back(&All[I]); + break; + } + return Result; +} + } // namespace clang::include_cleaner diff --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp --- a/clang-tools-extra/include-cleaner/lib/Types.cpp +++ b/clang-tools-extra/include-cleaner/lib/Types.cpp @@ -9,6 +9,7 @@ #include "clang-include-cleaner/Types.h" #include "clang/AST/Decl.h" #include "clang/Basic/FileEntry.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" namespace clang::include_cleaner { @@ -19,6 +20,8 @@ if (const auto *ND = llvm::dyn_cast(&S.declaration())) return OS << ND->getNameAsString(); return OS << S.declaration().getDeclKindName(); + case Symbol::Macro: + return OS << S.macro().Name; case Symbol::Standard: return OS << S.standard().scope() << S.standard().name(); } @@ -35,4 +38,18 @@ llvm_unreachable("Unhandled Header kind"); } +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Include &I) { + return OS << I.Line << ": " << I.Spelled << " => " + << (I.Resolved ? I.Resolved->getName() : ""); +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolReference &R) { + // We can't decode the Location without SourceManager. Its raw representation + // isn't completely useless (and distinguishes SymbolReference from Symbol). + return OS << R.Symbol << "@0x" + << llvm::utohexstr(R.RefLocation.getRawEncoding(), + /*Width=*/CHAR_BIT * + sizeof(SourceLocation::UIntTy)); +} + } // namespace clang::include_cleaner diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp --- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp @@ -8,16 +8,23 @@ #include "clang-include-cleaner/Record.h" #include "clang/Frontend/FrontendAction.h" +#include "clang/Frontend/FrontendActions.h" #include "clang/Testing/TestAST.h" +#include "clang/Tooling/Inclusions/StandardLibrary.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Testing/Support/Annotations.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace clang::include_cleaner { namespace { +using testing::ElementsAre; +using testing::ElementsAreArray; +using testing::IsEmpty; // Matches a Decl* if it is a NamedDecl with the given name. -MATCHER_P(Named, N, "") { +MATCHER_P(named, N, "") { if (const NamedDecl *ND = llvm::dyn_cast(arg)) { if (N == ND->getNameAsString()) return true; @@ -63,7 +70,7 @@ } )cpp"; auto AST = build(); - EXPECT_THAT(Recorded.Roots, testing::ElementsAre(Named("ns"))); + EXPECT_THAT(Recorded.Roots, testing::ElementsAre(named("ns"))); } // Decl in included file is not a root. @@ -74,7 +81,7 @@ void mainFunc(); )cpp"; auto AST = build(); - EXPECT_THAT(Recorded.Roots, testing::ElementsAre(Named("mainFunc"))); + EXPECT_THAT(Recorded.Roots, testing::ElementsAre(named("mainFunc"))); } // Decl from macro expanded into the main file is a root. @@ -85,7 +92,147 @@ X )cpp"; auto AST = build(); - EXPECT_THAT(Recorded.Roots, testing::ElementsAre(Named("x"))); + EXPECT_THAT(Recorded.Roots, testing::ElementsAre(named("x"))); +} + +class RecordPPTest : public ::testing::Test { +protected: + TestInputs Inputs; + RecordedPP Recorded; + + RecordPPTest() { + struct RecordAction : public PreprocessOnlyAction { + RecordedPP &Out; + RecordAction(RecordedPP &Out) : Out(Out) {} + + void ExecuteAction() override { + auto &PP = getCompilerInstance().getPreprocessor(); + PP.addPPCallbacks(Out.record(PP)); + PreprocessOnlyAction::ExecuteAction(); + } + }; + Inputs.MakeAction = [this] { + return std::make_unique(Recorded); + }; + } + + TestAST build() { return TestAST(Inputs); } +}; + +// Matches an Include with a particular spelling. +MATCHER_P(spelled, S, "") { return arg.Spelled == S; } + +TEST_F(RecordPPTest, CapturesIncludes) { + llvm::Annotations MainFile(R"cpp( + $H^#include "./header.h" + $M^#include "missing.h" + )cpp"); + Inputs.Code = MainFile.code(); + Inputs.ExtraFiles["header.h"] = ""; + Inputs.ErrorOK = true; // missing header + auto AST = build(); + + ASSERT_THAT( + Recorded.Includes.all(), + testing::ElementsAre(spelled("./header.h"), spelled("missing.h"))); + + auto &H = Recorded.Includes.all().front(); + EXPECT_EQ(H.Line, 2u); + EXPECT_EQ(H.HashLocation, + AST.sourceManager().getComposedLoc( + AST.sourceManager().getMainFileID(), MainFile.point("H"))); + EXPECT_EQ(H.Resolved, AST.fileManager().getFile("header.h").get()); + + auto &M = Recorded.Includes.all().back(); + EXPECT_EQ(M.Line, 3u); + EXPECT_EQ(M.HashLocation, + AST.sourceManager().getComposedLoc( + AST.sourceManager().getMainFileID(), MainFile.point("M"))); + EXPECT_EQ(M.Resolved, nullptr); +} + +TEST_F(RecordPPTest, CapturesMacroRefs) { + llvm::Annotations Header(R"cpp( + #define $def^X 1 + + // Refs, but not in main file. + #define Y X + int one = X; + )cpp"); + llvm::Annotations MainFile(R"cpp( + #define EARLY X // not a ref, no definition + #include "header.h" + #define LATE ^X + #define LATE2 ^X // a ref even if not expanded + + int uno = ^X; + int jeden = $exp^LATE; // a ref in LATE's expansion + + #define IDENT(X) X // not a ref, shadowed + int eins = IDENT(^X); + + #undef ^X + // Not refs, rather a new macro with the same name. + #define X 2 + int two = X; + )cpp"); + Inputs.Code = MainFile.code(); + Inputs.ExtraFiles["header.h"] = Header.code(); + auto AST = build(); + const auto &SM = AST.sourceManager(); + + SourceLocation Def = SM.getComposedLoc( + SM.translateFile(AST.fileManager().getFile("header.h").get()), + Header.point("def")); + ASSERT_THAT(Recorded.MacroReferences, Not(IsEmpty())); + Symbol OrigX = Recorded.MacroReferences.front().Symbol; + EXPECT_EQ("X", OrigX.macro().Name->getName()); + EXPECT_EQ(Def, OrigX.macro().Definition); + + std::vector RefOffsets; + std::vector ExpOffsets; // Expansion locs of refs in macro locs. + std::vector RefMacroLocs; + for (const auto &Ref : Recorded.MacroReferences) { + if (Ref.Symbol == OrigX) { + auto [FID, Off] = SM.getDecomposedLoc(Ref.RefLocation); + if (FID == SM.getMainFileID()) { + RefOffsets.push_back(Off); + } else if (Ref.RefLocation.isMacroID() && + SM.isWrittenInMainFile(SM.getExpansionLoc(Ref.RefLocation))) { + ExpOffsets.push_back( + SM.getDecomposedExpansionLoc(Ref.RefLocation).second); + } else { + ADD_FAILURE() << Ref.RefLocation.printToString(SM); + } + } + } + EXPECT_THAT(RefOffsets, ElementsAreArray(MainFile.points())); + EXPECT_THAT(ExpOffsets, ElementsAreArray(MainFile.points("exp"))); +} + +// Matches an Include* on the specified line; +MATCHER_P(line, N, "") { return arg->Line == (unsigned)N; } + +TEST(RecordedIncludesTest, Match) { + // We're using synthetic data, but need a FileManager to obtain FileEntry*s. + // Ensure it doesn't do any actual IO. + auto FS = llvm::makeIntrusiveRefCnt(); + FileManager FM(FileSystemOptions{}); + const FileEntry *A = FM.getVirtualFile("/path/a", /*Size=*/0, time_t{}); + const FileEntry *B = FM.getVirtualFile("/path/b", /*Size=*/0, time_t{}); + + RecordedPP::RecordedIncludes Includes; + Includes.add(Include{"a", A, SourceLocation(), 1}); + Includes.add(Include{"a2", A, SourceLocation(), 2}); + Includes.add(Include{"b", B, SourceLocation(), 3}); + Includes.add(Include{"vector", B, SourceLocation(), 4}); + Includes.add(Include{"vector", B, SourceLocation(), 5}); + Includes.add(Include{"missing", nullptr, SourceLocation(), 6}); + + EXPECT_THAT(Includes.match(A), ElementsAre(line(1), line(2))); + EXPECT_THAT(Includes.match(B), ElementsAre(line(3), line(4), line(5))); + EXPECT_THAT(Includes.match(*tooling::stdlib::Header::named("")), + ElementsAre(line(4), line(5))); } } // namespace