diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt --- a/clang-tools-extra/CMakeLists.txt +++ b/clang-tools-extra/CMakeLists.txt @@ -14,6 +14,7 @@ add_subdirectory(clang-include-fixer) add_subdirectory(clang-move) add_subdirectory(clang-query) +add_subdirectory(include-cleaner) add_subdirectory(pp-trace) add_subdirectory(pseudo) add_subdirectory(tool-template) diff --git a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt --- a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt @@ -1,3 +1,5 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../include-cleaner/include) + set(LLVM_LINK_COMPONENTS FrontendOpenMP Support @@ -19,6 +21,7 @@ UnconventionalAssignOperatorCheck.cpp UniqueptrResetReleaseCheck.cpp UnusedAliasDeclsCheck.cpp + UnusedIncludesCheck.cpp UnusedParametersCheck.cpp UnusedUsingDeclsCheck.cpp @@ -36,6 +39,7 @@ clangAST clangASTMatchers clangBasic + clangIncludeCleaner clangLex clangSerialization clangTooling diff --git a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp --- a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp @@ -23,6 +23,7 @@ #include "UnconventionalAssignOperatorCheck.h" #include "UniqueptrResetReleaseCheck.h" #include "UnusedAliasDeclsCheck.h" +#include "UnusedIncludesCheck.h" #include "UnusedParametersCheck.h" #include "UnusedUsingDeclsCheck.h" @@ -58,6 +59,7 @@ "misc-uniqueptr-reset-release"); CheckFactories.registerCheck( "misc-unused-alias-decls"); + CheckFactories.registerCheck("misc-unused-includes"); CheckFactories.registerCheck( "misc-unused-parameters"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/clang-tidy/misc/UnusedIncludesCheck.h b/clang-tools-extra/clang-tidy/misc/UnusedIncludesCheck.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clang-tidy/misc/UnusedIncludesCheck.h @@ -0,0 +1,42 @@ +//===--- UnusedIncludesCheck.h - clang-tidy----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_INCLUDES_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_INCLUDES_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace include_cleaner { +class AnalysisContext; +struct RecordedPP; +} // namespace include_cleaner +namespace tidy { +namespace misc { + +class UnusedIncludesCheck : public ClangTidyCheck { +public: + UnusedIncludesCheck(StringRef Name, ClangTidyContext *Context); + ~UnusedIncludesCheck(); + void registerPPCallbacks(const SourceManager &SM, Preprocessor *, + Preprocessor *) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + void onEndOfTranslationUnit() override; + +private: + std::unique_ptr Ctx; + std::unique_ptr RecordedPP; + std::vector Top; +}; + +} // namespace misc +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_INCLUDES_H diff --git a/clang-tools-extra/clang-tidy/misc/UnusedIncludesCheck.cpp b/clang-tools-extra/clang-tidy/misc/UnusedIncludesCheck.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clang-tidy/misc/UnusedIncludesCheck.cpp @@ -0,0 +1,78 @@ +//===--- UnusedIncludesCheck.cpp - clang-tidy------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UnusedIncludesCheck.h" +#include "clang-include-cleaner/Analysis.h" +#include "clang-include-cleaner/Hooks.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Preprocessor.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace misc { + +UnusedIncludesCheck::UnusedIncludesCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + +void UnusedIncludesCheck::registerPPCallbacks(const SourceManager &SM, + Preprocessor *PP, + Preprocessor *) { + Ctx = std::make_unique( + include_cleaner::Policy{}, *PP); + RecordedPP = std::make_unique(); + PP->addPPCallbacks(RecordedPP->record(*Ctx)); +} + +void UnusedIncludesCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher( + translationUnitDecl(forEach(decl(isExpansionInMainFile()).bind("top"))), + this); +} + +void UnusedIncludesCheck::check(const MatchFinder::MatchResult &Result) { + Top.push_back(const_cast(Result.Nodes.getNodeAs("top"))); +} + +void UnusedIncludesCheck::onEndOfTranslationUnit() { + llvm::DenseSet Used; + llvm::DenseSet Seen; + include_cleaner::walkUsed( + *Ctx, Top, RecordedPP->MacroReferences, + [&](SourceLocation Loc, include_cleaner::Symbol Sym, + llvm::ArrayRef Headers) { + for (const auto &Header : Headers) { + if (!Seen.insert(Header).second) + continue; + for (const auto *I : RecordedPP->Includes.match(Header)) + Used.insert(I); + } + }); + for (const auto &I : RecordedPP->Includes.all()) { + if (!Used.contains(&I)) { + const auto &SM = Ctx->sourceManager(); + FileID FID = SM.getFileID(I.Location); + diag(I.Location, "include is unused") + << FixItHint::CreateRemoval(CharSourceRange::getCharRange( + SM.translateLineCol(FID, I.Line, 1), + SM.translateLineCol(FID, I.Line + 1, 1))); + } else { + diag(I.Location, "include is used!"); + } + } +} + +UnusedIncludesCheck::~UnusedIncludesCheck() = default; + +} // namespace misc +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -2,6 +2,8 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include-cleaner/include) + add_subdirectory(support) # Configure the Features.inc file. @@ -152,6 +154,7 @@ clangDriver clangFormat clangFrontend + clangIncludeCleaner clangIndex clangLex clangSema diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -12,9 +12,11 @@ #include "CodeCompletionStrings.h" #include "Config.h" #include "FindTarget.h" +#include "IncludeCleaner.h" #include "ParsedAST.h" #include "Selection.h" #include "SourceCode.h" +#include "clang-include-cleaner/Analysis.h" #include "index/SymbolCollector.h" #include "support/Markup.h" #include "clang/AST/ASTContext.h" @@ -987,6 +989,23 @@ // FIXME: We don't have a fitting value for Kind. HI.Definition = URIForFile::canonicalize(Inc.Resolved, *MainFilePath).file().str(); + + // FIXME: share code, macros too... + include_cleaner::AnalysisContext Ctx(include_cleaner::Policy{}, + AST.getPreprocessor()); + std::vector Provides; + include_cleaner::walkUsed( + Ctx, AST.getLocalTopLevelDecls(), /*Macros=*/{}, + [&](SourceLocation Loc, include_cleaner::Symbol S, + llvm::ArrayRef Headers) { + for (const auto &H : Headers) + if (match(H, Inc, AST.getIncludeStructure())) + Provides.push_back(S.name()); + }); + llvm::sort(Provides); + Provides.erase(std::unique(Provides.begin(), Provides.end()), + Provides.end()); + HI.Documentation = "provides " + llvm::join(Provides, ", "); HI.DefinitionLanguage = ""; return HI; } diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h --- a/clang-tools-extra/clangd/IncludeCleaner.h +++ b/clang-tools-extra/clangd/IncludeCleaner.h @@ -23,62 +23,13 @@ #include "Headers.h" #include "ParsedAST.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Tooling/Inclusions/StandardLibrary.h" +#include "clang-include-cleaner/Types.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLFunctionalExtras.h" #include namespace clang { namespace clangd { -struct ReferencedLocations { - llvm::DenseSet User; - llvm::DenseSet Stdlib; -}; - -/// Finds locations of all symbols used in the main file. -/// -/// - RecursiveASTVisitor finds references to symbols and records their -/// associated locations. These may be macro expansions, and are not resolved -/// to their spelling or expansion location. These locations are later used to -/// determine which headers should be marked as "used" and "directly used". -/// - If \p Tokens is not nullptr, we also examine all identifier tokens in the -/// file in case they reference macros macros. -/// We use this to compute unused headers, so we: -/// -/// - cover the whole file in a single traversal for efficiency -/// - don't attempt to describe where symbols were referenced from in -/// ambiguous cases (e.g. implicitly used symbols, multiple declarations) -/// - err on the side of reporting all possible locations -ReferencedLocations findReferencedLocations(ASTContext &Ctx, Preprocessor &PP, - const syntax::TokenBuffer *Tokens); -ReferencedLocations findReferencedLocations(ParsedAST &AST); - -struct ReferencedFiles { - llvm::DenseSet User; - llvm::DenseSet Stdlib; -}; - -/// Retrieves IDs of all files containing SourceLocations from \p Locs. -/// The output only includes things SourceManager sees as files (not macro IDs). -/// This can include , etc that are not true files. -/// \p HeaderResponsible returns the public header that should be included given -/// symbols from a file with the given FileID (example: public headers should be -/// preferred to non self-contained and private headers). -ReferencedFiles -findReferencedFiles(const ReferencedLocations &Locs, const SourceManager &SM, - llvm::function_ref HeaderResponsible); -ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs, - const IncludeStructure &Includes, - const SourceManager &SM); - -/// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs). -/// FileIDs that are not true files ( etc) are dropped. -llvm::DenseSet -translateToHeaderIDs(const ReferencedFiles &Files, - const IncludeStructure &Includes, const SourceManager &SM); - /// Retrieves headers that are referenced from the main file but not used. /// In unclear cases, headers are not marked as unused. std::vector @@ -90,6 +41,10 @@ std::vector issueUnusedIncludesDiagnostics(ParsedAST &AST, llvm::StringRef Code); +// Does an include-cleaner header spec match a clangd recorded inclusion? +bool match(const include_cleaner::Header &H, const Inclusion &I, + const IncludeStructure &S); + /// Affects whether standard library includes should be considered for /// removal. This is off by default for now due to implementation limitations: /// - macros are not tracked diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -12,11 +12,10 @@ #include "ParsedAST.h" #include "Protocol.h" #include "SourceCode.h" +#include "clang-include-cleaner/Analysis.h" +#include "clang-include-cleaner/Types.h" #include "support/Logger.h" #include "support/Trace.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/ExprCXX.h" -#include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/HeaderSearch.h" @@ -34,163 +33,6 @@ namespace { -/// Crawler traverses the AST and feeds in the locations of (sometimes -/// implicitly) used symbols into \p Result. -class ReferencedLocationCrawler - : public RecursiveASTVisitor { -public: - ReferencedLocationCrawler(ReferencedLocations &Result, - const SourceManager &SM) - : Result(Result), SM(SM) {} - - bool VisitDeclRefExpr(DeclRefExpr *DRE) { - add(DRE->getDecl()); - add(DRE->getFoundDecl()); - return true; - } - - bool VisitMemberExpr(MemberExpr *ME) { - add(ME->getMemberDecl()); - add(ME->getFoundDecl().getDecl()); - return true; - } - - bool VisitTagType(TagType *TT) { - add(TT->getDecl()); - return true; - } - - bool VisitFunctionDecl(FunctionDecl *FD) { - // Function definition will require redeclarations to be included. - if (FD->isThisDeclarationADefinition()) - add(FD); - return true; - } - - bool VisitCXXConstructExpr(CXXConstructExpr *CCE) { - add(CCE->getConstructor()); - return true; - } - - bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { - add(TST->getTemplateName().getAsTemplateDecl()); // Primary template. - add(TST->getAsCXXRecordDecl()); // Specialization - return true; - } - - bool VisitUsingType(UsingType *UT) { - add(UT->getFoundDecl()); - return true; - } - - bool VisitTypedefType(TypedefType *TT) { - add(TT->getDecl()); - return true; - } - - // Consider types of any subexpression used, even if the type is not named. - // This is helpful in getFoo().bar(), where Foo must be complete. - // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to - // consider types "used" when they are not directly spelled in code. - bool VisitExpr(Expr *E) { - TraverseType(E->getType()); - return true; - } - - bool TraverseType(QualType T) { - if (isNew(T.getTypePtrOrNull())) // don't care about quals - Base::TraverseType(T); - return true; - } - - bool VisitUsingDecl(UsingDecl *D) { - for (const auto *Shadow : D->shadows()) - add(Shadow->getTargetDecl()); - return true; - } - - // Enums may be usefully forward-declared as *complete* types by specifying - // an underlying type. In this case, the definition should see the declaration - // so they can be checked for compatibility. - bool VisitEnumDecl(EnumDecl *D) { - if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo()) - add(D); - return true; - } - - // When the overload is not resolved yet, mark all candidates as used. - bool VisitOverloadExpr(OverloadExpr *E) { - for (const auto *ResolutionDecl : E->decls()) - add(ResolutionDecl); - return true; - } - -private: - using Base = RecursiveASTVisitor; - - void add(const Decl *D) { - if (!D || !isNew(D->getCanonicalDecl())) - return; - if (auto SS = StdRecognizer(D)) { - Result.Stdlib.insert(*SS); - return; - } - // Special case RecordDecls, as it is common for them to be forward - // declared multiple times. The most common cases are: - // - Definition available in TU, only mark that one as usage. The rest is - // likely to be unnecessary. This might result in false positives when an - // internal definition is visible. - // - There's a forward declaration in the main file, no need for other - // redecls. - if (const auto *RD = llvm::dyn_cast(D)) { - if (const auto *Definition = RD->getDefinition()) { - Result.User.insert(Definition->getLocation()); - return; - } - if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation())) - return; - } - for (const Decl *Redecl : D->redecls()) - Result.User.insert(Redecl->getLocation()); - } - - bool isNew(const void *P) { return P && Visited.insert(P).second; } - - ReferencedLocations &Result; - llvm::DenseSet Visited; - const SourceManager &SM; - tooling::stdlib::Recognizer StdRecognizer; -}; - -// Given a set of referenced FileIDs, determines all the potentially-referenced -// files and macros by traversing expansion/spelling locations of macro IDs. -// This is used to map the referenced SourceLocations onto real files. -struct ReferencedFilesBuilder { - ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {} - llvm::DenseSet Files; - llvm::DenseSet Macros; - const SourceManager &SM; - - void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); } - - void add(FileID FID, SourceLocation Loc) { - if (FID.isInvalid()) - return; - assert(SM.isInFileID(Loc, FID)); - if (Loc.isFileID()) { - Files.insert(FID); - return; - } - // Don't process the same macro FID twice. - if (!Macros.insert(FID).second) - return; - const auto &Exp = SM.getSLocEntry(FID).getExpansion(); - add(Exp.getSpellingLoc()); - add(Exp.getExpansionLocStart()); - add(Exp.getExpansionLocEnd()); - } -}; - // Returns the range starting at '#' and ending at EOL. Escaped newlines are not // handled. clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) { @@ -207,10 +49,10 @@ // Finds locations of macros referenced from within the main file. That includes // references that were not yet expanded, e.g `BAR` in `#define FOO BAR`. -void findReferencedMacros(const SourceManager &SM, Preprocessor &PP, - const syntax::TokenBuffer *Tokens, - ReferencedLocations &Result) { +std::vector +findReferencedMacros(ParsedAST &AST, include_cleaner::AnalysisContext &Ctx) { trace::Span Tracer("IncludeCleaner::findReferencedMacros"); + std::vector Result; // FIXME(kirillbobyrev): The macros from the main file are collected in // ParsedAST's MainFileMacros. However, we can't use it here because it // doesn't handle macro references that were not expanded, e.g. in macro @@ -220,15 +62,19 @@ // this mechanism (as opposed to iterating through all tokens) will improve // the performance of findReferencedMacros and also improve other features // relying on MainFileMacros. - for (const syntax::Token &Tok : Tokens->spelledTokens(SM.getMainFileID())) { - auto Macro = locateMacroAt(Tok, PP); + for (const syntax::Token &Tok : + AST.getTokens().spelledTokens(AST.getSourceManager().getMainFileID())) { + auto Macro = locateMacroAt(Tok, AST.getPreprocessor()); if (!Macro) continue; auto Loc = Macro->Info->getDefinitionLoc(); if (Loc.isValid()) - Result.User.insert(Loc); - // FIXME: support stdlib macros + Result.push_back(include_cleaner::SymbolReference{ + Tok.location(), + Ctx.macro(AST.getPreprocessor().getIdentifierInfo(Macro->Name), + Loc)}); } + return Result; } static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) { @@ -259,90 +105,8 @@ return true; } -// In case symbols are coming from non self-contained header, we need to find -// its first includer that is self-contained. This is the header users can -// include, so it will be responsible for bringing the symbols from given -// header into the scope. -FileID headerResponsible(FileID ID, const SourceManager &SM, - const IncludeStructure &Includes) { - // Unroll the chain of non self-contained headers until we find the one that - // can be included. - for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID(); - FE = SM.getFileEntryForID(ID)) { - // If FE is nullptr, we consider it to be the responsible header. - if (!FE) - break; - auto HID = Includes.getID(FE); - assert(HID && "We're iterating over headers already existing in " - "IncludeStructure"); - if (Includes.isSelfContained(*HID)) - break; - // The header is not self-contained: put the responsibility for its symbols - // on its includer. - ID = SM.getFileID(SM.getIncludeLoc(ID)); - } - return ID; -} - } // namespace -ReferencedLocations findReferencedLocations(ASTContext &Ctx, Preprocessor &PP, - const syntax::TokenBuffer *Tokens) { - trace::Span Tracer("IncludeCleaner::findReferencedLocations"); - ReferencedLocations Result; - const auto &SM = Ctx.getSourceManager(); - ReferencedLocationCrawler Crawler(Result, SM); - Crawler.TraverseAST(Ctx); - if (Tokens) - findReferencedMacros(SM, PP, Tokens, Result); - return Result; -} - -ReferencedLocations findReferencedLocations(ParsedAST &AST) { - return findReferencedLocations(AST.getASTContext(), AST.getPreprocessor(), - &AST.getTokens()); -} - -ReferencedFiles -findReferencedFiles(const ReferencedLocations &Locs, const SourceManager &SM, - llvm::function_ref HeaderResponsible) { - std::vector Sorted{Locs.User.begin(), Locs.User.end()}; - llvm::sort(Sorted); // Group by FileID. - ReferencedFilesBuilder Builder(SM); - for (auto It = Sorted.begin(); It < Sorted.end();) { - FileID FID = SM.getFileID(*It); - Builder.add(FID, *It); - // Cheaply skip over all the other locations from the same FileID. - // This avoids lots of redundant Loc->File lookups for the same file. - do - ++It; - while (It != Sorted.end() && SM.isInFileID(*It, FID)); - } - - // If a header is not self-contained, we consider its symbols a logical part - // of the including file. Therefore, mark the parents of all used - // non-self-contained FileIDs as used. Perform this on FileIDs rather than - // HeaderIDs, as each inclusion of a non-self-contained file is distinct. - llvm::DenseSet UserFiles; - for (FileID ID : Builder.Files) - UserFiles.insert(HeaderResponsible(ID)); - - llvm::DenseSet StdlibFiles; - for (const auto &Symbol : Locs.Stdlib) - for (const auto &Header : Symbol.headers()) - StdlibFiles.insert(Header); - - return {std::move(UserFiles), std::move(StdlibFiles)}; -} - -ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs, - const IncludeStructure &Includes, - const SourceManager &SM) { - return findReferencedFiles(Locs, SM, [&SM, &Includes](FileID ID) { - return headerResponsible(ID, SM, Includes); - }); -} - std::vector getUnused(ParsedAST &AST, const llvm::DenseSet &ReferencedFiles) { @@ -365,46 +129,45 @@ return Unused; } -#ifndef NDEBUG -// Is FID a , etc? -static bool isSpecialBuffer(FileID FID, const SourceManager &SM) { - const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile(); - return FI.getName().startswith("<"); -} -#endif - -llvm::DenseSet -translateToHeaderIDs(const ReferencedFiles &Files, - const IncludeStructure &Includes, - const SourceManager &SM) { - trace::Span Tracer("IncludeCleaner::translateToHeaderIDs"); - llvm::DenseSet TranslatedHeaderIDs; - TranslatedHeaderIDs.reserve(Files.User.size()); - for (FileID FID : Files.User) { - const FileEntry *FE = SM.getFileEntryForID(FID); - if (!FE) { - assert(isSpecialBuffer(FID, SM)); - continue; - } - const auto File = Includes.getID(FE); - assert(File); - TranslatedHeaderIDs.insert(*File); - } - for (tooling::stdlib::Header StdlibUsed : Files.Stdlib) - for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed)) - TranslatedHeaderIDs.insert(HID); - return TranslatedHeaderIDs; +bool match(const include_cleaner::Header &H, const Inclusion &I, + const IncludeStructure &S) { + switch (H.kind()) { + case include_cleaner::Header::Physical: + if (auto HID = S.getID(H.getPhysical())) + if (static_cast(*HID) == I.HeaderID) + return true; + break; + case include_cleaner::Header::StandardLibrary: + return I.Written == H.getStandardLibrary().name(); + case include_cleaner::Header::Verbatim: + return llvm::StringRef(I.Written).trim("\"<>") == H.getVerbatimSpelling(); + case include_cleaner::Header::Builtin: + case include_cleaner::Header::MainFile: + break; + } + return false; } std::vector computeUnusedIncludes(ParsedAST &AST) { - const auto &SM = AST.getSourceManager(); - - auto Refs = findReferencedLocations(AST); - auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(), - AST.getSourceManager()); - auto ReferencedHeaders = - translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM); - return getUnused(AST, ReferencedHeaders); + include_cleaner::AnalysisContext Ctx(include_cleaner::Policy{}, + AST.getPreprocessor()); + llvm::DenseSet Used; + include_cleaner::walkUsed( + Ctx, AST.getLocalTopLevelDecls(), + /*MacroRefs=*/findReferencedMacros(AST, Ctx), + [&](SourceLocation Loc, include_cleaner::Symbol Sym, + llvm::ArrayRef Headers) { + for (const auto &I : AST.getIncludeStructure().MainFileIncludes) + for (const auto &H : Headers) + if (match(H, I, AST.getIncludeStructure())) + Used.insert(&I); + }); + std::vector Unused; + for (const auto &I : AST.getIncludeStructure().MainFileIncludes) { + if (!Used.contains(&I) && mayConsiderUnused(I, AST)) + Unused.push_back(&I); + } + return Unused; } std::vector issueUnusedIncludesDiagnostics(ParsedAST &AST, diff --git a/clang-tools-extra/include-cleaner/CMakeLists.txt b/clang-tools-extra/include-cleaner/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/CMakeLists.txt @@ -0,0 +1,5 @@ +include_directories(include) +include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) +add_subdirectory(lib) +add_subdirectory(tool) + diff --git a/clang-tools-extra/include-cleaner/README.md b/clang-tools-extra/include-cleaner/README.md new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/README.md @@ -0,0 +1,5 @@ +# include-cleaner + +This is intended as a library, to be embedded in clang-tidy, clangd and other +tools. Nevertheless a standalone clang-include-cleaner based on libTooling is +provided. diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h @@ -0,0 +1,59 @@ +//===--- Analysis.h - Analyze used files --------------------------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_INCLUDE_CLEANER_ANALYSIS_H +#define CLANG_INCLUDE_CLEANER_ANALYSIS_H + +#include "clang-include-cleaner/Policy.h" +#include "clang-include-cleaner/Types.h" + +namespace clang { +namespace include_cleaner { +class Cache; + +class AnalysisContext { +public: + AnalysisContext(const Policy &, const Preprocessor &); + AnalysisContext(AnalysisContext &&) = delete; + AnalysisContext &operator=(AnalysisContext &&) = delete; + ~AnalysisContext(); + + const SourceManager &sourceManager() const { return *SM; } + const Preprocessor &preprocessor() const { return *PP; } + const Policy &policy() const { return P; } + + Cache &cache() { return *C; } + // FIXME: does this need to be public? + Symbol macro(const IdentifierInfo *, SourceLocation); + +private: + Policy P; + const SourceManager *SM; + const Preprocessor *PP; + std::unique_ptr C; +}; + +// A UsedSymbolVisitor is a callback invoked for each symbol reference seen. +// References occur at a particular location, refer to a single symbol, and +// that symbol may be provided by any of several headers. +using UsedSymbolVisitor = + llvm::function_ref ProvidedBy)>; + +// Find and report all references to symbols in a region of code. +// The AST traversal is rooted at ASTRoots - typically top-level declarations +// of a single source file. MacroRefs are additional recorded references to +// macros, which do not appear in the AST. +void walkUsed(AnalysisContext &, llvm::ArrayRef ASTRoots, + llvm::ArrayRef MacroRefs, + UsedSymbolVisitor Callback); + +} // namespace include_cleaner +} // namespace clang + +#endif diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Hooks.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Hooks.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Hooks.h @@ -0,0 +1,60 @@ +//===--- Hooks.h - Record compiler events -------------------------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_INCLUDE_CLEANER_HOOKS_H +#define CLANG_INCLUDE_CLEANER_HOOKS_H + +#include "Analysis.h" +#include "Types.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace clang { +class FileEntry; +class PPCallbacks; +namespace include_cleaner { +class PPRecorder; + +struct RecordedPP { + struct Include { + llvm::StringRef Spelled; + const FileEntry *Resolved; + SourceLocation Location; + unsigned Line; + }; + class RecordedIncludes { + public: + llvm::ArrayRef all() const { return All; } + llvm::SmallVector match(Header H) const; + + private: + std::vector All; + llvm::StringMap> BySpelling; + llvm::DenseMap> ByFile; + friend PPRecorder; + } Includes; + + std::vector MacroReferences; + + std::unique_ptr record(AnalysisContext &Ctx); +}; + +struct RecordedAST { + std::vector TopLevelDecls; + + std::unique_ptr record(AnalysisContext &Ctx); +}; + +} // namespace include_cleaner +} // namespace clang + +#endif diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Policy.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Policy.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Policy.h @@ -0,0 +1,35 @@ +//===--- Policy.h - Tuning what is considered used ----------------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_INCLUDE_CLEANER_POLICY_H +#define CLANG_INCLUDE_CLEANER_POLICY_H + +namespace clang { +namespace include_cleaner { + +// Provides some fine-tuning of include-cleaner's choices about what is used. +// +// Changing the policy serves two purposes: +// - marking more things used reduces the false-positives for "unused include", +// while marking fewer things improves "missing include" in the same way. +// - different coding styles may make different decisions about which includes +// are required. +struct Policy { + // Does construction count as use of the type, when the type is not named? + // e.g. printVector({x, y, z}); - is std::vector used? + bool Construction = false; + // Is member access tracked as a reference? + bool Members = false; + // Are operator calls tracked as references? + bool Operators = false; +}; + +} // namespace include_cleaner +} // namespace clang + +#endif diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h @@ -0,0 +1,187 @@ +//===--- Types.h - Data structures for used-symbol analysis -------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_INCLUDE_CLEANER_TYPES_H +#define CLANG_INCLUDE_CLEANER_TYPES_H + +#include "clang/AST/DeclBase.h" +#include "clang/Tooling/Inclusions/StandardLibrary.h" +#include "llvm/ADT/PointerSumType.h" + +namespace clang { +class IdentifierInfo; +class MacroDirective; + +namespace include_cleaner { + +// Identifies a macro, along with the definition that was seen for it. +struct DefinedMacro { + const IdentifierInfo *Name; + const SourceLocation Definition; +}; + +// A Symbol is an entity that can be referenced. +// It is either a declaration (NamedDecl) or a macro (DefinedMacro). +class Symbol { +public: + enum Kind { + Macro, + Declaration, + }; + Symbol(NamedDecl *ND) : Target(ND) {} + Symbol(const DefinedMacro *M) : Target(M) {} + + std::string name() const; + std::string nodeName() const; + Kind kind() const { return Target.is() ? Declaration : Macro; } + + NamedDecl *getDeclaration() const { return Target.get(); } + const DefinedMacro *getMacro() const { + return Target.get(); + } + +private: + llvm::PointerUnion Target; +}; + +// A usage of a Symbol seen in our source code. +struct SymbolReference { + SourceLocation Location; + Symbol Target; +}; + +// A Location is a place where a symbol can be provided. +// It is either a physical part of the TU (SourceLocation) or a logical location +// in the standard library (stdlib::Symbol). +class Location { +public: + enum Kind : uint8_t { + Physical, + StandardLibrary, + }; + + Location(SourceLocation S) : K(Physical), SrcLoc(S) {} + Location(tooling::stdlib::Symbol S) : K(StandardLibrary), StdlibSym(S) {} + + std::string name(const SourceManager &SM) const; + Kind kind() const { return K; } + + SourceLocation getPhysical() const { + assert(kind() == Physical); + return SrcLoc; + }; + tooling::stdlib::Symbol getStandardLibrary() const { + assert(kind() == StandardLibrary); + return StdlibSym; + }; + +private: + Kind K; + union { + SourceLocation SrcLoc; + tooling::stdlib::Symbol StdlibSym; + }; +}; + +// A Header is an includable file that can provide access to Locations. +// It is either a physical file (FileEntry), a logical location in the standard +// library (stdlib::Header), or a verbatim header spelling (StringRef). +class Header { +public: + enum Kind : uint8_t { + Physical, + StandardLibrary, + Verbatim, + Builtin, + MainFile, + }; + + Header(const FileEntry *FE) : K(Physical), PhysicalFile(FE) {} + Header(tooling::stdlib::Header H) : K(StandardLibrary), StdlibHeader(H) {} + Header(const char *V) : K(Verbatim), VerbatimSpelling(V) {} + static Header builtin() { return Header{Builtin}; }; + static Header mainFile() { return Header{MainFile}; }; + + std::string name() const; + Kind kind() const { return K; } + + const FileEntry *getPhysical() const { + assert(kind() == Physical); + return PhysicalFile; + }; + tooling::stdlib::Header getStandardLibrary() const { + assert(kind() == StandardLibrary); + return StdlibHeader; + }; + llvm::StringRef getVerbatimSpelling() const { + assert(kind() == Verbatim); + return VerbatimSpelling; + }; + +private: + Header(Kind K) : K(K) {} + + Kind K; + union { + const FileEntry *PhysicalFile; + tooling::stdlib::Header StdlibHeader; + const char *VerbatimSpelling; + }; + + friend bool operator==(const Header &L, const Header &R) { + if (L.kind() != R.kind()) + return false; + switch (L.kind()) { + case Physical: + return L.getPhysical() == R.getPhysical(); + case StandardLibrary: + return L.getStandardLibrary() == R.getStandardLibrary(); + case Verbatim: + return L.getVerbatimSpelling() == R.getVerbatimSpelling(); + case Builtin: + case MainFile: + return true; // no payload + } + llvm_unreachable("unhandled Header kind"); + } + + friend llvm::hash_code hash_value(const Header &H) { + switch (H.K) { + case Header::Physical: + return llvm::hash_combine(H.K, H.getPhysical()); + case Header::StandardLibrary: + // FIXME: make StdlibHeader hashable instead. + return llvm::hash_combine(H.K, H.getStandardLibrary().name()); + case Header::Verbatim: + return llvm::hash_combine(H.K, llvm::StringRef(H.VerbatimSpelling)); + case Header::Builtin: + case Header::MainFile: + return llvm::hash_value(H.K); + } + } +}; + +template struct DefaultDenseMapInfo { + static T isEqual(const T &L, const T &R) { return L == R; } + static unsigned getHashValue(const T &V) { return hash_value(V); } +}; + +} // namespace include_cleaner +} // namespace clang + +namespace llvm { +template <> struct DenseMapInfo { + using Header = clang::include_cleaner::Header; + static Header getTombstoneKey() { return Header("__tombstone__"); } + static Header getEmptyKey() { return Header("__empty__"); } + static bool isEqual(const Header &L, const Header &R) { return L == R; } + static unsigned getHashValue(const Header &V) { return hash_value(V); } +}; +} // namespace llvm + +#endif diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp @@ -0,0 +1,37 @@ +#include "clang-include-cleaner/Analysis.h" +#include "AnalysisInternal.h" +#include "clang/Lex/Preprocessor.h" + +namespace clang { +namespace include_cleaner { + +AnalysisContext::AnalysisContext(const Policy &P, const Preprocessor &PP) + : P(P), SM(&PP.getSourceManager()), PP(&PP), C(std::make_unique()) {} +AnalysisContext::~AnalysisContext() = default; + +void walkUsed(AnalysisContext &Ctx, llvm::ArrayRef ASTRoots, + llvm::ArrayRef MacroRefs, + UsedSymbolVisitor Callback) { + for (Decl *Root : ASTRoots) { + walkAST(Ctx, *Root, [&](SourceLocation RefLoc, NamedDecl &ND) { + auto Locations = locateDecl(Ctx, ND); + llvm::SmallVector
Headers; + for (const auto &Loc : Locations) + Headers.append(includableHeader(Ctx, Loc)); + Callback(RefLoc, &ND, Headers); + }); + } + for (const SymbolReference &MacroRef : MacroRefs) { + assert(MacroRef.Target.kind() == Symbol::Macro); + auto Loc = locateMacro(Ctx, *MacroRef.Target.getMacro()); + auto Headers = includableHeader(Ctx, Loc); + Callback(MacroRef.Location, MacroRef.Target, Headers); + } +} + +Symbol AnalysisContext::macro(const IdentifierInfo *II, SourceLocation Loc) { + return cache().macro(II, Loc); +} + +} // namespace include_cleaner +} // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h @@ -0,0 +1,65 @@ +//===--- AnalysisInternal.h - Analysis building blocks ------------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides smaller, testable pieces of the used-header analysis. +// We find the headers by chaining together several mappings. +// +// AST => AST node => Symbol => Location => Header +// / +// Macro expansion => +// +// The individual steps are declared here. +// (AST => AST Node => Symbol is one API to avoid materializing DynTypedNodes). +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H +#define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H + +#include "clang-include-cleaner/Analysis.h" +#include "clang-include-cleaner/Types.h" +#include "clang/Tooling/Inclusions/StandardLibrary.h" + +namespace clang { +namespace include_cleaner { + +class Cache { +public: + Symbol macro(const IdentifierInfo *Name, const SourceLocation Def) { + auto &DMS = DefinedMacros[Name->getName()]; + // Linear search. We probably only saw ~1 definition of each macro name. + for (const DefinedMacro &DM : DMS) + if (DM.Definition == Def) + return &DM; + DMS.push_back(DefinedMacro{Name, Def}); + return &DMS.back(); + } + + tooling::stdlib::Recognizer StdlibRecognizer; + +private: + llvm::StringMap> DefinedMacros; +}; + +// Traverses a subtree of the AST, reporting declarations referenced. +void walkAST(AnalysisContext &, Decl &Root, + llvm::function_ref); + +// Finds the locations where a declaration is provided. +llvm::SmallVector locateDecl(AnalysisContext &, const NamedDecl &); + +// Finds the locations where a macro is provided. +Location locateMacro(AnalysisContext &, const DefinedMacro &); + +// Finds the headers that provide a location. +llvm::SmallVector
includableHeader(AnalysisContext &, const Location &); + +} // namespace include_cleaner +} // namespace clang + +#endif diff --git a/clang-tools-extra/include-cleaner/lib/CMakeLists.txt b/clang-tools-extra/include-cleaner/lib/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/CMakeLists.txt @@ -0,0 +1,16 @@ +set(LLVM_LINK_COMPONENTS Support) + +add_clang_library(clangIncludeCleaner + Analysis.cpp + Headers.cpp + Hooks.cpp + Locations.cpp + Types.cpp + WalkAST.cpp + + LINK_LIBS + clangBasic + clangLex + clangAST + ) + diff --git a/clang-tools-extra/include-cleaner/lib/Headers.cpp b/clang-tools-extra/include-cleaner/lib/Headers.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/Headers.cpp @@ -0,0 +1,28 @@ +#include "AnalysisInternal.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Preprocessor.h" + +namespace clang { +namespace include_cleaner { + +llvm::SmallVector
includableHeader(AnalysisContext &Ctx, + const Location &Loc) { + switch (Loc.kind()) { + case Location::Physical: { + FileID FID = Ctx.sourceManager().getFileID( + Ctx.sourceManager().getExpansionLoc(Loc.getPhysical())); + if (FID == Ctx.sourceManager().getMainFileID()) + return {Header::mainFile()}; + if (FID == Ctx.preprocessor().getPredefinesFileID()) + return {Header::builtin()}; + if (auto *FE = Ctx.sourceManager().getFileEntryForID(FID)) + return {FE}; + return {}; + } + case Location::StandardLibrary: + return {Loc.getStandardLibrary().header()}; + } +} + +} // namespace include_cleaner +} // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/Hooks.cpp b/clang-tools-extra/include-cleaner/lib/Hooks.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/Hooks.cpp @@ -0,0 +1,165 @@ +//===--- Hooks.cpp - Record events from the compiler --------------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-include-cleaner/Hooks.h" +#include "AnalysisInternal.h" +#include "clang-include-cleaner/Analysis.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclGroup.h" +#include "clang/AST/DeclObjC.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Token.h" + +namespace clang { +namespace include_cleaner { + +class PPRecorder : public PPCallbacks { +public: + PPRecorder(AnalysisContext &Ctx, RecordedPP &Recorded) + : Ctx(Ctx), Recorded(Recorded) {} + + virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) override { + Active = Ctx.sourceManager().isWrittenInMainFile(Loc); + } + + void InclusionDirective(SourceLocation Hash, const Token &IncludeTok, + StringRef SpelledFilename, bool IsAngled, + CharSourceRange FilenameRange, const FileEntry *File, + StringRef SearchPath, StringRef RelativePath, + const Module *, SrcMgr::CharacteristicKind) override { + if (!Active) + return; + + unsigned Index = Recorded.Includes.All.size(); + Recorded.Includes.All.emplace_back(); + RecordedPP::Include &I = Recorded.Includes.All.back(); + I.Location = Hash; + I.Resolved = File; + I.Line = Ctx.sourceManager().getSpellingLineNumber(Hash); + auto BySpellingIt = + Recorded.Includes.BySpelling.try_emplace(SpelledFilename).first; + I.Spelled = BySpellingIt->first(); + + BySpellingIt->second.push_back(Index); + Recorded.Includes.ByFile[File].push_back(Index); + } + + void MacroExpands(const Token &MacroName, const MacroDefinition &MD, + SourceRange Range, const MacroArgs *Args) override { + if (!Active) + return; + recordMacroRef(MacroName, *MD.getMacroInfo()); + } + + void MacroDefined(const Token &MacroName, const MacroDirective *MD) override { + if (!Active) + return; + + const auto *MI = MD->getMacroInfo(); + // The tokens of a macro definition could refer to a macro. + // Formally this reference isn't resolved until this macro is expanded, + // but we want to treat it as a reference anyway. + for (const auto &Tok : MI->tokens()) { + auto *II = Tok.getIdentifierInfo(); + // Could this token be a reference to a macro? (Not param to this macro). + if (!II || !II->hadMacroDefinition() || + llvm::is_contained(MI->params(), II)) + continue; + if (const MacroInfo *MI = Ctx.preprocessor().getMacroInfo(II)) + recordMacroRef(Tok, *MI); + } + } + +private: + void recordMacroRef(const Token &Tok, const MacroInfo &MI) { + if (MI.isBuiltinMacro()) + return; // __FILE__ is not a reference. + Recorded.MacroReferences.push_back(SymbolReference{ + Tok.getLocation(), + Ctx.cache().macro(Tok.getIdentifierInfo(), MI.getDefinitionLoc())}); + } + + bool Active = false; + AnalysisContext &Ctx; + RecordedPP &Recorded; +}; + +llvm::SmallVector +RecordedPP::RecordedIncludes::match(Header H) const { + llvm::SmallVector Result; + switch (H.kind()) { + case Header::Physical: + for (unsigned I : ByFile.lookup(H.getPhysical())) + Result.push_back(&All[I]); + break; + case Header::StandardLibrary: + for (unsigned I : + BySpelling.lookup(H.getStandardLibrary().name().trim("<>"))) + Result.push_back(&All[I]); + break; + case Header::Verbatim: + for (unsigned I : BySpelling.lookup(H.getVerbatimSpelling())) + Result.push_back(&All[I]); + break; + case Header::Builtin: + case Header::MainFile: + break; + } + llvm::sort(Result); + Result.erase(std::unique(Result.begin(), Result.end()), Result.end()); + return Result; +} + +class ASTRecorder : public ASTConsumer { +public: + ASTRecorder(AnalysisContext &Ctx, RecordedAST &Recorded) + : Ctx(Ctx), Recorded(Recorded) {} + + bool HandleTopLevelDecl(DeclGroupRef DG) override { + for (Decl *D : DG) { + if (!Ctx.sourceManager().isWrittenInMainFile( + Ctx.sourceManager().getExpansionLoc(D->getLocation()))) + continue; + if (const auto *T = llvm::dyn_cast(D)) + if (T->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) + continue; + if (const auto *T = llvm::dyn_cast(D)) + if (T->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) + continue; + if (const auto *T = llvm::dyn_cast(D)) + if (T->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) + continue; + // ObjCMethodDecl are not actually top-level! + if (isa(D)) + continue; + + Recorded.TopLevelDecls.push_back(D); + } + return true; + } + +private: + AnalysisContext &Ctx; + RecordedAST &Recorded; +}; + +std::unique_ptr RecordedPP::record(AnalysisContext &Ctx) { + return std::make_unique(Ctx, *this); +} + +std::unique_ptr RecordedAST::record(AnalysisContext &Ctx) { + return std::make_unique(Ctx, *this); +} + +} // namespace include_cleaner +} // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/Locations.cpp b/clang-tools-extra/include-cleaner/lib/Locations.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/Locations.cpp @@ -0,0 +1,35 @@ +#include "AnalysisInternal.h" +#include "clang-include-cleaner/Analysis.h" +#include "clang-include-cleaner/Types.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang { +namespace include_cleaner { + +llvm::SmallVector locateDecl(AnalysisContext &Ctx, + const NamedDecl &ND) { + if (auto StdlibSym = Ctx.cache().StdlibRecognizer(&ND)) + return {*StdlibSym}; + + llvm::SmallVector Result; + for (const Decl *RD : ND.redecls()) { + // `friend X` is not an interesting location for X unless it's acting as a + // forward-declaration. + if (RD->getFriendObjectKind() == Decl::FOK_Declared) + continue; + SourceLocation Loc = RD->getLocation(); + if (Loc.isValid()) + Result.push_back(Loc); + } + return Result; +} + +Location locateMacro(AnalysisContext &Ctx, const DefinedMacro &M) { + return {M.Definition}; +} + +} // namespace include_cleaner +} // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/Types.cpp @@ -0,0 +1,53 @@ +#include "clang-include-cleaner/Types.h" +#include "clang/AST/Decl.h" +#include "clang/Basic/FileEntry.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Tooling/Inclusions/StandardLibrary.h" + +namespace clang { +namespace include_cleaner { + +std::string Symbol::name() const { + switch (kind()) { + case Macro: + return getMacro()->Name->getName().str(); + case Declaration: + return getDeclaration()->getNameAsString(); + } + llvm_unreachable("Unhandled Symbol kind"); +} + +std::string Symbol::nodeName() const { + if (kind() == Macro) + return "macro"; + return getDeclaration()->getDeclKindName(); +} + +std::string Location::name(const SourceManager &SM) const { + switch (K) { + case Physical: + return SrcLoc.printToString(SM); + case StandardLibrary: + return StdlibSym.name().str(); + } + llvm_unreachable("Unhandled Location kind"); +} + +std::string Header::name() const { + switch (K) { + case Physical: + return PhysicalFile->getName().str(); + case StandardLibrary: + return StdlibHeader.name().str(); + case Verbatim: + return VerbatimSpelling; + case Builtin: + return ""; + case MainFile: + return ""; + } + llvm_unreachable("Unhandled Header kind"); +} + +} // namespace include_cleaner +} // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp @@ -0,0 +1,118 @@ +#include "AnalysisInternal.h" +#include "clang-include-cleaner/Analysis.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/SaveAndRestore.h" + +namespace clang { +namespace include_cleaner { +namespace { + +using DeclCallback = llvm::function_ref; + +class ASTWalker : public RecursiveASTVisitor { +public: + ASTWalker(AnalysisContext &Ctx, DeclCallback Callback) + : Ctx(Ctx), Callback(Callback) {} + + bool VisitDeclRefExpr(DeclRefExpr *E) { + if (!Ctx.policy().Operators) + if (auto *FD = E->getDecl()->getAsFunction()) + if (FD->isOverloadedOperator()) + return true; + report(E->getLocation(), E->getFoundDecl()); + return true; + } + + bool VisitMemberExpr(MemberExpr *ME) { + if (Ctx.policy().Members) + report(ME->getMemberLoc(), ME->getFoundDecl().getDecl()); + return true; + } + + bool VisitTagType(TagType *TT) { + report(LocationOfType, TT->getDecl()); + return true; + } + + bool VisitFunctionDecl(FunctionDecl *FD) { + // Count function definitions as a reference to their declarations. + if (FD->isThisDeclarationADefinition() && FD->getCanonicalDecl() != FD) + report(FD->getLocation(), FD->getCanonicalDecl()); + return true; + } + + bool VisitCXXConstructExpr(CXXConstructExpr *E) { + if (!Ctx.policy().Construction) + return true; + SaveAndRestore Loc(LocationOfType, E->getLocation()); + LocationOfType = E->getLocation(); + return TraverseType(E->getType()); + } + + bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { + report(LocationOfType, + TST->getTemplateName().getAsTemplateDecl()); // Primary template. + report(LocationOfType, TST->getAsCXXRecordDecl()); // Specialization + return true; + } + + bool VisitUsingType(UsingType *UT) { + report(LocationOfType, UT->getFoundDecl()); + return true; + } + + bool VisitTypedefType(TypedefType *TT) { + report(LocationOfType, TT->getDecl()); + return true; + } + + bool VisitUsingDecl(UsingDecl *UD) { + for (const auto *USD : UD->shadows()) + report(UD->getLocation(), USD->getTargetDecl()); + return true; + } + + bool VisitOverloadExpr(OverloadExpr *E) { + if (llvm::isa(E) && !Ctx.policy().Members) + return true; + for (auto *Candidate : E->decls()) + report(E->getExprLoc(), Candidate); + return true; + } + + bool TraverseTypeLoc(TypeLoc TL) { + SaveAndRestore Loc(LocationOfType, TL.getBeginLoc()); + return Base::TraverseTypeLoc(TL); + } + +private: + void report(SourceLocation Loc, NamedDecl *ND) { + while (Loc.isMacroID()) { + auto DecLoc = Ctx.sourceManager().getDecomposedLoc(Loc); + const SrcMgr::ExpansionInfo &Expansion = + Ctx.sourceManager().getSLocEntry(DecLoc.first).getExpansion(); + if (!Expansion.isMacroArgExpansion()) + return; // Names in macro bodies are not considered references. + Loc = Expansion.getSpellingLoc().getLocWithOffset(DecLoc.second); + } + if (ND) + Callback(Loc, *ND); + } + + using Base = RecursiveASTVisitor; + + AnalysisContext &Ctx; + DeclCallback Callback; + + SourceLocation LocationOfType; +}; + +} // namespace + +void walkAST(AnalysisContext &Ctx, Decl &Root, DeclCallback Callback) { + ASTWalker(Ctx, Callback).TraverseDecl(&Root); +} + +} // namespace include_cleaner +} // namespace clang diff --git a/clang-tools-extra/include-cleaner/tool/CMakeLists.txt b/clang-tools-extra/include-cleaner/tool/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/tool/CMakeLists.txt @@ -0,0 +1,19 @@ +set(LLVM_LINK_COMPONENTS support) + +add_clang_tool(clang-include-cleaner + ClangIncludeCleaner.cpp + ) + +clang_target_link_libraries(clang-include-cleaner + PRIVATE + clangBasic + clangFrontend + clangTooling + ) + +target_link_libraries(clang-include-cleaner + PRIVATE + clangIncludeCleaner + ) + + diff --git a/clang-tools-extra/include-cleaner/tool/ClangIncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/ClangIncludeCleaner.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/tool/ClangIncludeCleaner.cpp @@ -0,0 +1,162 @@ +#include "clang-include-cleaner/Analysis.h" +#include "clang-include-cleaner/Hooks.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendAction.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" + +llvm::cl::OptionCategory OptionsCat{"clang-include-cleaner"}; +llvm::cl::opt ShowSatisfied{ + "satisfied", + llvm::cl::cat(OptionsCat), + llvm::cl::desc( + "Show references whose header is included, and used includes"), + llvm::cl::init(false), +}; +llvm::cl::opt Recover{ + "recover", + llvm::cl::cat(OptionsCat), + llvm::cl::desc("Suppress further errors for the same header"), + llvm::cl::init(true), +}; + +namespace clang { +namespace include_cleaner { +namespace { + +class Action : public clang::ASTFrontendAction { +public: + bool BeginSourceFileAction(CompilerInstance &CI) override { + Diags = &CI.getDiagnostics(); + initDiagnostics(); + Ctx.emplace(Policy{}, CI.getPreprocessor()); + CI.getPreprocessor().addPPCallbacks(PP.record(*Ctx)); + return true; + } + + void EndSourceFile() override { + llvm::DenseSet
Recovered; + llvm::DenseMap Used; + walkUsed(*Ctx, AST.TopLevelDecls, PP.MacroReferences, + [&](SourceLocation Loc, Symbol Sym, ArrayRef
Headers) { + diagnoseReference(Loc, Sym, Headers, Recovered, Used); + }); + diagnoseIncludes(PP.Includes.all(), Used); + Ctx.reset(); + + ASTFrontendAction::EndSourceFile(); + } + + virtual std::unique_ptr + CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { + return AST.record(*Ctx); + } + +private: + void initDiagnostics() { + auto SatisfiedLevel = + ShowSatisfied ? DiagnosticsEngine::Remark : DiagnosticsEngine::Ignored; + DiagIDs.Satisfied = + Diags->getCustomDiagID(SatisfiedLevel, "%0 '%1' provided by %2"); + DiagIDs.Unsatisfied = Diags->getCustomDiagID( + DiagnosticsEngine::Error, "no header included for %0 '%1'"); + DiagIDs.NoHeader = Diags->getCustomDiagID( + DiagnosticsEngine::Warning, "unclear which header provides %0 '%1'"); + DiagIDs.NoteHeader = + Diags->getCustomDiagID(DiagnosticsEngine::Note, "provided by %0"); + DiagIDs.Used = + Diags->getCustomDiagID(SatisfiedLevel, "include provides %0 '%1'"); + DiagIDs.Unused = + Diags->getCustomDiagID(DiagnosticsEngine::Error, "include is unused"); + } + + void + diagnoseReference(SourceLocation Loc, Symbol Sym, ArrayRef
Headers, + llvm::DenseSet
&Recovered, + llvm::DenseMap &Used) { + bool Diagnosed = false; + for (const auto &H : Headers) { + if (H.kind() == Header::Builtin || H.kind() == Header::MainFile) { + if (!Diagnosed) { + Diags->Report(Loc, DiagIDs.Satisfied) + << Sym.nodeName() << Sym.name() << H.name(); + Diagnosed = true; + } + } + for (const auto *I : PP.Includes.match(H)) { + Used.try_emplace(I, Sym); + if (!Diagnosed) { + Diags->Report(Loc, DiagIDs.Satisfied) + << Sym.nodeName() << Sym.name() << I->Spelled; + Diagnosed = true; + } + } + } + if (Diagnosed) + return; + for (const auto &H : Headers) { + if (Recovered.contains(H)) { + Diags->Report(Loc, DiagIDs.Satisfied) + << Sym.nodeName() << Sym.name() << H.name(); + return; + } + } + Diags->Report(Loc, Headers.empty() ? DiagIDs.NoHeader : DiagIDs.Unsatisfied) + << Sym.nodeName() << Sym.name(); + for (const auto &H : Headers) { + Recovered.insert(H); + Diags->Report(DiagIDs.NoteHeader) << H.name(); + } + } + + void diagnoseIncludes( + ArrayRef Includes, + const llvm::DenseMap &Used) { + for (const auto &I : Includes) { + auto It = Used.find(&I); + if (It == Used.end()) + Diags->Report(I.Location, DiagIDs.Unused); + else + Diags->Report(I.Location, DiagIDs.Used) + << It->second.nodeName() << It->second.name(); + } + } + + llvm::Optional Ctx; + RecordedPP PP; + RecordedAST AST; + DiagnosticsEngine *Diags; + struct DiagIDs { + unsigned Satisfied; + unsigned Unsatisfied; + unsigned NoHeader; + unsigned NoteHeader; + + unsigned Used; + unsigned Unused; + } DiagIDs; +}; + +} // namespace +} // namespace include_cleaner +} // namespace clang + +int main(int Argc, const char **Argv) { + llvm::InitLLVM X(Argc, Argv); + auto OptionsParser = + clang::tooling::CommonOptionsParser::create(Argc, Argv, OptionsCat); + if (!OptionsParser) { + llvm::errs() << toString(OptionsParser.takeError()); + return 1; + } + + // Create the tool and run the compilation. + return clang::tooling::ClangTool(OptionsParser->getCompilations(), + OptionsParser->getSourcePathList()) + .run(clang::tooling::newFrontendActionFactory< + clang::include_cleaner::Action>() + .get()); +}