diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h @@ -26,10 +26,10 @@ /// /// References occur at a particular location, refer to a single symbol, and /// that symbol may be provided by several headers. -/// FIXME: Provide signals about the reference type and providing headers so the -/// caller can filter and rank the results. -using UsedSymbolCB = llvm::function_ref Providers)>; +/// FIXME: Provide signals about the providing headers so the caller can filter +/// and rank the results. +using UsedSymbolCB = llvm::function_ref Providers)>; /// Find and report all references to symbols in a region of code. /// diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h @@ -42,9 +42,7 @@ /// The location of the Name where the macro is defined. SourceLocation Definition; - bool operator==(const Macro &S) const { - return Definition == S.Definition; - } + bool operator==(const Macro &S) const { return Definition == S.Definition; } }; /// An entity that can be referenced in the code. @@ -78,12 +76,24 @@ }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Symbol &); +/// Indicates the relation between the reference and the target. +enum class RefType { + /// Target is named by the reference, e.g. function call. + Explicit, + /// Target isn't spelled, e.g. default constructor call in `Foo f;` + Implicit, + /// Target's use can't be proven, e.g. a candidate for an unresolved overload. + Ambiguous, +}; + /// Indicates that a piece of code refers to a symbol. struct SymbolReference { - /// The symbol referred to. - Symbol Symbol; /// The point in the code that refers to the symbol. SourceLocation RefLocation; + /// The symbol referred to. + Symbol Target; + /// Relation type between the reference location and the target. + RefType RT; }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolReference &); diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp --- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp +++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "clang-include-cleaner/Analysis.h" -#include "clang-include-cleaner/Types.h" #include "AnalysisInternal.h" +#include "clang-include-cleaner/Types.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/SourceManager.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" @@ -32,17 +32,17 @@ tooling::stdlib::Recognizer Recognizer; for (auto *Root : ASTRoots) { auto &SM = Root->getASTContext().getSourceManager(); - walkAST(*Root, [&](SourceLocation Loc, NamedDecl &ND) { + walkAST(*Root, [&](SourceLocation Loc, NamedDecl &ND, RefType RT) { if (auto SS = Recognizer(&ND)) { // FIXME: Also report forward decls from main-file, so that the caller // can decide to insert/ignore a header. - return CB(Loc, Symbol(*SS), toHeader(SS->headers())); + return CB({Loc, Symbol(*SS), RT}, toHeader(SS->headers())); } // FIXME: Extract locations from redecls. // FIXME: Handle IWYU pragmas, non self-contained files. // FIXME: Handle macro locations. if (auto *FE = SM.getFileEntryForID(SM.getFileID(ND.getLocation()))) - return CB(Loc, Symbol(ND), {Header(FE)}); + return CB({Loc, Symbol(ND), RT}, {Header(FE)}); }); } // FIXME: Handle references of macros. diff --git a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h --- a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h +++ b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h @@ -21,6 +21,7 @@ #ifndef CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H #define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H +#include "clang-include-cleaner/Types.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/STLFunctionalExtras.h" @@ -37,10 +38,13 @@ /// the primary location of the AST node found under Root. /// - the NamedDecl is the symbol referenced. It is canonical, rather than e.g. /// the redecl actually found by lookup. +/// - the RefType describes the relation between the SourceLocation and the +/// NamedDecl. /// /// walkAST is typically called once per top-level declaration in the file /// being analyzed, in order to find all references within it. -void walkAST(Decl &Root, llvm::function_ref); +void walkAST(Decl &Root, + llvm::function_ref); /// Write an HTML summary of the analysis to the given stream. /// FIXME: Once analysis has a public API, this should be public too. diff --git a/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp b/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp --- a/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp +++ b/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "AnalysisInternal.h" +#include "clang-include-cleaner/Analysis.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/PrettyPrinter.h" @@ -212,8 +213,9 @@ llvm::raw_ostream &OS) { Reporter R(OS, Ctx, File); for (Decl *Root : Roots) - walkAST(*Root, - [&](SourceLocation Loc, const NamedDecl &D) { R.addRef(Loc, D); }); + walkAST(*Root, [&](SourceLocation Loc, const NamedDecl &D, RefType) { + R.addRef(Loc, D); + }); R.write(); } diff --git a/clang-tools-extra/include-cleaner/lib/Record.cpp b/clang-tools-extra/include-cleaner/lib/Record.cpp --- a/clang-tools-extra/include-cleaner/lib/Record.cpp +++ b/clang-tools-extra/include-cleaner/lib/Record.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang-include-cleaner/Record.h" +#include "clang-include-cleaner/Types.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclGroup.h" @@ -86,8 +87,9 @@ if (MI.isBuiltinMacro()) return; // __FILE__ is not a reference. Recorded.MacroReferences.push_back( - SymbolReference{Macro{Tok.getIdentifierInfo(), MI.getDefinitionLoc()}, - Tok.getLocation()}); + SymbolReference{Tok.getLocation(), + Macro{Tok.getIdentifierInfo(), MI.getDefinitionLoc()}, + RefType::Explicit}); } bool Active = false; @@ -254,5 +256,4 @@ return std::make_unique(*this, PP); } - } // namespace clang::include_cleaner diff --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp --- a/clang-tools-extra/include-cleaner/lib/Types.cpp +++ b/clang-tools-extra/include-cleaner/lib/Types.cpp @@ -46,7 +46,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolReference &R) { // We can't decode the Location without SourceManager. Its raw representation // isn't completely useless (and distinguishes SymbolReference from Symbol). - return OS << R.Symbol << "@0x" + return OS << R.Target << "@0x" << llvm::utohexstr(R.RefLocation.getRawEncoding(), /*Width=*/CHAR_BIT * sizeof(SourceLocation::UIntTy)); diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp --- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp +++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "AnalysisInternal.h" +#include "clang-include-cleaner/Analysis.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" @@ -16,15 +17,21 @@ #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Casting.h" namespace clang::include_cleaner { namespace { -using DeclCallback = llvm::function_ref; +using DeclCallback = + llvm::function_ref; class ASTWalker : public RecursiveASTVisitor { DeclCallback Callback; + // Whether we're traversing declarations coming from a header file. + // This helps figure out whether certain symbols can be assumed as unused + // (e.g. overloads brought into an implementation file, but not used). + bool IsHeader = false; bool handleTemplateName(SourceLocation Loc, TemplateName TN) { // For using-templates, only mark the alias. @@ -36,14 +43,16 @@ return true; } - void report(SourceLocation Loc, NamedDecl *ND) { + void report(SourceLocation Loc, NamedDecl *ND, + RefType RT = RefType::Explicit) { if (!ND || Loc.isInvalid()) return; - Callback(Loc, *cast(ND->getCanonicalDecl())); + Callback(Loc, *cast(ND->getCanonicalDecl()), RT); } public: - ASTWalker(DeclCallback Callback) : Callback(Callback) {} + ASTWalker(DeclCallback Callback, bool IsHeader) + : Callback(Callback), IsHeader(IsHeader) {} bool VisitDeclRefExpr(DeclRefExpr *DRE) { report(DRE->getLocation(), DRE->getFoundDecl()); @@ -56,23 +65,31 @@ } bool VisitCXXConstructExpr(CXXConstructExpr *E) { - report(E->getLocation(), E->getConstructor()); + report(E->getLocation(), E->getConstructor(), + E->getParenOrBraceRange().isValid() ? RefType::Explicit + : RefType::Implicit); return true; } bool VisitOverloadExpr(OverloadExpr *E) { // Since we can't prove which overloads are used, report all of them. - // FIXME: Provide caller with the ability to make a decision for such uses. - llvm::for_each(E->decls(), - [this, E](NamedDecl *D) { report(E->getNameLoc(), D); }); + llvm::for_each(E->decls(), [this, E](NamedDecl *D) { + report(E->getNameLoc(), D, RefType::Ambiguous); + }); return true; } bool VisitUsingDecl(UsingDecl *UD) { - // FIXME: Provide caller with the ability to tell apart used/non-used - // targets. - for (const auto *Shadow : UD->shadows()) - report(UD->getLocation(), Shadow->getTargetDecl()); + for (const auto *Shadow : UD->shadows()) { + auto *TD = Shadow->getTargetDecl(); + auto IsUsed = TD->isUsed() || TD->isReferenced(); + // We ignore unused overloads inside implementation files, as the ones in + // headers might still be used by the dependents of the header. + if (!IsUsed && !IsHeader) + continue; + report(UD->getLocation(), TD, + IsUsed ? RefType::Explicit : RefType::Ambiguous); + } return true; } @@ -135,7 +152,14 @@ } // namespace void walkAST(Decl &Root, DeclCallback Callback) { - ASTWalker(Callback).TraverseDecl(&Root); + auto &AST = Root.getASTContext(); + auto &SM = AST.getSourceManager(); + // If decl isn't written in main file, assume it's coming from an include, + // hence written in a header. + bool IsRootedAtHeader = + AST.getLangOpts().IsHeaderFile || + !SM.isWrittenInMainFile(SM.getSpellingLoc(Root.getLocation())); + ASTWalker(Callback, IsRootedAtHeader).TraverseDecl(&Root); } } // namespace clang::include_cleaner diff --git a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp --- a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp @@ -50,12 +50,12 @@ auto &SM = AST.sourceManager(); llvm::DenseMap> OffsetToProviders; - walkUsed(TopLevelDecls, [&](SourceLocation RefLoc, Symbol S, - llvm::ArrayRef
Providers) { - auto [FID, Offset] = SM.getDecomposedLoc(RefLoc); - EXPECT_EQ(FID, SM.getMainFileID()); - OffsetToProviders.try_emplace(Offset, Providers.vec()); - }); + walkUsed(TopLevelDecls, + [&](SymbolReference SymRef, llvm::ArrayRef
Providers) { + auto [FID, Offset] = SM.getDecomposedLoc(SymRef.RefLocation); + EXPECT_EQ(FID, SM.getMainFileID()); + OffsetToProviders.try_emplace(Offset, Providers.vec()); + }); auto HeaderFile = Header(AST.fileManager().getFile("header.h").get()); auto MainFile = Header(SM.getFileEntryForID(SM.getMainFileID())); auto VectorSTL = Header(tooling::stdlib::Header::named("").value()); diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp --- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp @@ -185,7 +185,7 @@ SM.translateFile(AST.fileManager().getFile("header.h").get()), Header.point("def")); ASSERT_THAT(Recorded.MacroReferences, Not(IsEmpty())); - Symbol OrigX = Recorded.MacroReferences.front().Symbol; + Symbol OrigX = Recorded.MacroReferences.front().Target; EXPECT_EQ("X", OrigX.macro().Name->getName()); EXPECT_EQ(Def, OrigX.macro().Definition); @@ -193,7 +193,7 @@ std::vector ExpOffsets; // Expansion locs of refs in macro locs. std::vector RefMacroLocs; for (const auto &Ref : Recorded.MacroReferences) { - if (Ref.Symbol == OrigX) { + if (Ref.Target == OrigX) { auto [FID, Off] = SM.getDecomposedLoc(Ref.RefLocation); if (FID == SM.getMainFileID()) { RefOffsets.push_back(Off); diff --git a/clang-tools-extra/include-cleaner/unittests/WalkASTTest.cpp b/clang-tools-extra/include-cleaner/unittests/WalkASTTest.cpp --- a/clang-tools-extra/include-cleaner/unittests/WalkASTTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/WalkASTTest.cpp @@ -12,11 +12,15 @@ #include "clang/Frontend/TextDiagnostic.h" #include "clang/Testing/TestAST.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Testing/Support/Annotations.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include +#include +#include #include namespace clang::include_cleaner { @@ -24,10 +28,23 @@ using testing::Pair; using testing::UnorderedElementsAre; +llvm::StringLiteral to_string(RefType RT) { + switch (RT) { + case RefType::Explicit: + return "explicit"; + case RefType::Implicit: + return "implicit"; + case RefType::Ambiguous: + return "ambiguous"; + } + llvm_unreachable("Unexpected RefType"); +} + // Specifies a test of which symbols are referenced by a piece of code. -// +// If `// c++-header` is present, treats referencing code as a header file. +// Target should contain points annotated with the reference kind. // Example: -// Target: int ^foo(); +// Target: int $explicit^foo(); // Referencing: int x = ^foo(); // There must be exactly one referencing location marked. void testWalk(llvm::StringRef TargetCode, llvm::StringRef ReferencingCode) { @@ -39,6 +56,8 @@ Inputs.ExtraArgs.push_back("-include"); Inputs.ExtraArgs.push_back("target.h"); Inputs.ExtraArgs.push_back("-std=c++17"); + if (Referencing.code().contains("// c++-header\n")) + Inputs.ExtraArgs.push_back("-xc++-header"); TestAST AST(Inputs); const auto &SM = AST.sourceManager(); @@ -51,20 +70,21 @@ llvm::cantFail(AST.fileManager().getFileRef("target.h"))); // Perform the walk, and capture the offsets of the referenced targets. - std::vector ReferencedOffsets; + std::unordered_map> ReferencedOffsets; for (Decl *D : AST.context().getTranslationUnitDecl()->decls()) { if (ReferencingFile != SM.getDecomposedExpansionLoc(D->getLocation()).first) continue; - walkAST(*D, [&](SourceLocation Loc, NamedDecl &ND) { + walkAST(*D, [&](SourceLocation Loc, NamedDecl &ND, RefType RT) { if (SM.getFileLoc(Loc) != ReferencingLoc) return; auto NDLoc = SM.getDecomposedLoc(SM.getFileLoc(ND.getLocation())); if (NDLoc.first != TargetFile) return; - ReferencedOffsets.push_back(NDLoc.second); + ReferencedOffsets[RT].push_back(NDLoc.second); }); } - llvm::sort(ReferencedOffsets); + for (auto &Entry : ReferencedOffsets) + llvm::sort(Entry.second); // Compare results to the expected points. // For each difference, show the target point in context, like a diagnostic. @@ -74,17 +94,22 @@ DiagOpts->ShowLevel = 0; DiagOpts->ShowNoteIncludeStack = 0; TextDiagnostic Diag(DiagOS, AST.context().getLangOpts(), DiagOpts); - auto DiagnosePoint = [&](const char *Message, unsigned Offset) { + auto DiagnosePoint = [&](llvm::StringRef Message, unsigned Offset) { Diag.emitDiagnostic( FullSourceLoc(SM.getComposedLoc(TargetFile, Offset), SM), DiagnosticsEngine::Note, Message, {}, {}); }; - for (auto Expected : Target.points()) - if (!llvm::is_contained(ReferencedOffsets, Expected)) - DiagnosePoint("location not marked used", Expected); - for (auto Actual : ReferencedOffsets) - if (!llvm::is_contained(Target.points(), Actual)) - DiagnosePoint("location unexpectedly used", Actual); + for (auto RT : {RefType::Explicit, RefType::Implicit, RefType::Ambiguous}) { + auto RTStr = to_string(RT); + for (auto Expected : Target.points(RTStr)) + if (!llvm::is_contained(ReferencedOffsets[RT], Expected)) + DiagnosePoint(("location not marked used with type " + RTStr).str(), + Expected); + for (auto Actual : ReferencedOffsets[RT]) + if (!llvm::is_contained(Target.points(RTStr), Actual)) + DiagnosePoint(("location unexpectedly used with type " + RTStr).str(), + Actual); + } // If there were any differences, we print the entire referencing code once. if (!DiagBuf.empty()) @@ -92,35 +117,46 @@ } TEST(WalkAST, DeclRef) { - testWalk("int ^x;", "int y = ^x;"); - testWalk("int ^foo();", "int y = ^foo();"); - testWalk("namespace ns { int ^x; }", "int y = ns::^x;"); - testWalk("struct S { static int ^x; };", "int y = S::^x;"); + testWalk("int $explicit^x;", "int y = ^x;"); + testWalk("int $explicit^foo();", "int y = ^foo();"); + testWalk("namespace ns { int $explicit^x; }", "int y = ns::^x;"); + testWalk("struct S { static int $explicit^x; };", "int y = S::^x;"); // Canonical declaration only. - testWalk("extern int ^x; int x;", "int y = ^x;"); + testWalk("extern int $explicit^x; int x;", "int y = ^x;"); // Return type of `foo` isn't used. - testWalk("struct S{}; S ^foo();", "auto bar() { return ^foo(); }"); + testWalk("struct S{}; S $explicit^foo();", "auto bar() { return ^foo(); }"); } TEST(WalkAST, TagType) { - testWalk("struct ^S {};", "^S *y;"); - testWalk("enum ^E {};", "^E *y;"); - testWalk("struct ^S { static int x; };", "int y = ^S::x;"); + testWalk("struct $explicit^S {};", "^S *y;"); + testWalk("enum $explicit^E {};", "^E *y;"); + testWalk("struct $explicit^S { static int x; };", "int y = ^S::x;"); } TEST(WalkAST, Alias) { testWalk(R"cpp( namespace ns { int x; } - using ns::^x; + using ns::$explicit^x; )cpp", "int y = ^x;"); - testWalk("using ^foo = int;", "^foo x;"); - testWalk("struct S {}; using ^foo = S;", "^foo x;"); + testWalk("using $explicit^foo = int;", "^foo x;"); + testWalk("struct S {}; using $explicit^foo = S;", "^foo x;"); } TEST(WalkAST, Using) { - testWalk("namespace ns { void ^x(); void ^x(int); }", "using ns::^x;"); - testWalk("namespace ns { struct S; } using ns::^S;", "^S *s;"); + // Make sure we ignore unused overloads. + testWalk(R"cpp( + namespace ns { + void $explicit^x(); void x(int); void x(char); + })cpp", + "using ns::^x; void foo() { x(); }"); + // We should report unused overloads if main file is a header. + testWalk(R"cpp( + namespace ns { + void $ambiguous^x(); void $ambiguous^x(int); void $ambiguous^x(char); + })cpp", + "// c++-header\n using ns::^x;"); + testWalk("namespace ns { struct S; } using ns::$explicit^S;", "^S *s;"); } TEST(WalkAST, Namespaces) { @@ -128,51 +164,55 @@ } TEST(WalkAST, TemplateNames) { - testWalk("template struct ^S {};", "^S s;"); + testWalk("template struct $explicit^S {};", "^S s;"); // FIXME: Template decl has the wrong primary location for type-alias template // decls. testWalk(R"cpp( template struct S {}; - template ^using foo = S;)cpp", + template $explicit^using foo = S;)cpp", "^foo x;"); testWalk(R"cpp( namespace ns {template struct S {}; } - using ns::^S;)cpp", + using ns::$explicit^S;)cpp", "^S x;"); - testWalk("template struct ^S {};", + testWalk("template struct $explicit^S {};", R"cpp( template