diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h --- a/clang-tools-extra/clangd/IncludeCleaner.h +++ b/clang-tools-extra/clangd/IncludeCleaner.h @@ -21,14 +21,9 @@ #include "Headers.h" #include "ParsedAST.h" #include "clang-include-cleaner/Types.h" -#include "index/CanonicalIncludes.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Tooling/Inclusions/StandardLibrary.h" #include "clang/Tooling/Syntax/Tokens.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringSet.h" -#include #include #include @@ -52,63 +47,6 @@ std::vector MissingIncludes; }; -struct ReferencedLocations { - llvm::DenseSet User; - llvm::DenseSet Stdlib; -}; - -/// Finds locations of all symbols used in the main file. -/// -/// - RecursiveASTVisitor finds references to symbols and records their -/// associated locations. These may be macro expansions, and are not resolved -/// to their spelling or expansion location. These locations are later used to -/// determine which headers should be marked as "used" and "directly used". -/// - If \p Tokens is not nullptr, we also examine all identifier tokens in the -/// file in case they reference macros macros. -/// We use this to compute unused headers, so we: -/// -/// - cover the whole file in a single traversal for efficiency -/// - don't attempt to describe where symbols were referenced from in -/// ambiguous cases (e.g. implicitly used symbols, multiple declarations) -/// - err on the side of reporting all possible locations -ReferencedLocations findReferencedLocations(ASTContext &Ctx, Preprocessor &PP, - const syntax::TokenBuffer *Tokens); -ReferencedLocations findReferencedLocations(ParsedAST &AST); - -struct ReferencedFiles { - llvm::DenseSet User; - llvm::DenseSet Stdlib; - /// Files responsible for the symbols referenced in the main file and defined - /// in private headers (private headers have IWYU pragma: private, include - /// "public.h"). We store spelling of the public header (with quotes or angle - /// brackets) files here to avoid dealing with full filenames and visibility. - llvm::StringSet<> SpelledUmbrellas; -}; - -/// Retrieves IDs of all files containing SourceLocations from \p Locs. -/// The output only includes things SourceManager sees as files (not macro IDs). -/// This can include , etc that are not true files. -/// \p HeaderResponsible returns the public header that should be included given -/// symbols from a file with the given FileID (example: public headers should be -/// preferred to non self-contained and private headers). -/// \p UmbrellaHeader returns the public public header is responsible for -/// providing symbols from a file with the given FileID (example: MyType.h -/// should be included instead of MyType_impl.h). -ReferencedFiles findReferencedFiles( - const ReferencedLocations &Locs, const SourceManager &SM, - llvm::function_ref HeaderResponsible, - llvm::function_ref(FileID)> UmbrellaHeader); -ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs, - const IncludeStructure &Includes, - const CanonicalIncludes &CanonIncludes, - const SourceManager &SM); - -/// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs). -/// FileIDs that are not true files ( etc) are dropped. -llvm::DenseSet -translateToHeaderIDs(const ReferencedFiles &Files, - const IncludeStructure &Includes, const SourceManager &SM); - /// Retrieves headers that are referenced from the main file but not used. /// In unclear cases, headers are not marked as unused. std::vector @@ -117,7 +55,6 @@ const llvm::StringSet<> &ReferencedPublicHeaders); IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST); -std::vector computeUnusedIncludes(ParsedAST &AST); std::vector issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code); diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -16,7 +16,6 @@ #include "URI.h" #include "clang-include-cleaner/Analysis.h" #include "clang-include-cleaner/Types.h" -#include "index/CanonicalIncludes.h" #include "support/Logger.h" #include "support/Path.h" #include "support/Trace.h" @@ -24,7 +23,6 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" -#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/TemplateName.h" #include "clang/AST/Type.h" #include "clang/Basic/Diagnostic.h" @@ -36,7 +34,6 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Inclusions/HeaderIncludes.h" -#include "clang/Tooling/Inclusions/IncludeStyle.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" #include "clang/Tooling/Syntax/Tokens.h" #include "llvm/ADT/ArrayRef.h" @@ -66,181 +63,6 @@ namespace { -/// Crawler traverses the AST and feeds in the locations of (sometimes -/// implicitly) used symbols into \p Result. -class ReferencedLocationCrawler - : public RecursiveASTVisitor { -public: - ReferencedLocationCrawler(ReferencedLocations &Result, - const SourceManager &SM) - : Result(Result), SM(SM) {} - - bool VisitDeclRefExpr(DeclRefExpr *DRE) { - add(DRE->getDecl()); - add(DRE->getFoundDecl()); - return true; - } - - bool VisitMemberExpr(MemberExpr *ME) { - add(ME->getMemberDecl()); - add(ME->getFoundDecl().getDecl()); - return true; - } - - bool VisitTagType(TagType *TT) { - add(TT->getDecl()); - return true; - } - - bool VisitFunctionDecl(FunctionDecl *FD) { - // Function definition will require redeclarations to be included. - if (FD->isThisDeclarationADefinition()) - add(FD); - return true; - } - - bool VisitCXXConstructExpr(CXXConstructExpr *CCE) { - add(CCE->getConstructor()); - return true; - } - - bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { - // Using templateName case is handled by the override TraverseTemplateName. - if (TST->getTemplateName().getKind() == TemplateName::UsingTemplate) - return true; - add(TST->getAsCXXRecordDecl()); // Specialization - return true; - } - - // There is no VisitTemplateName in RAV, thus we override the Traverse version - // to handle the Using TemplateName case. - bool TraverseTemplateName(TemplateName TN) { - VisitTemplateName(TN); - return Base::TraverseTemplateName(TN); - } - // A pseudo VisitTemplateName, dispatched by the above TraverseTemplateName! - bool VisitTemplateName(TemplateName TN) { - if (const auto *USD = TN.getAsUsingShadowDecl()) { - add(USD); - return true; - } - add(TN.getAsTemplateDecl()); // Primary template. - return true; - } - - bool VisitUsingType(UsingType *UT) { - add(UT->getFoundDecl()); - return true; - } - - bool VisitTypedefType(TypedefType *TT) { - add(TT->getDecl()); - return true; - } - - // Consider types of any subexpression used, even if the type is not named. - // This is helpful in getFoo().bar(), where Foo must be complete. - // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to - // consider types "used" when they are not directly spelled in code. - bool VisitExpr(Expr *E) { - TraverseType(E->getType()); - return true; - } - - bool TraverseType(QualType T) { - if (isNew(T.getTypePtrOrNull())) // don't care about quals - Base::TraverseType(T); - return true; - } - - bool VisitUsingDecl(UsingDecl *D) { - for (const auto *Shadow : D->shadows()) - add(Shadow->getTargetDecl()); - return true; - } - - // Enums may be usefully forward-declared as *complete* types by specifying - // an underlying type. In this case, the definition should see the declaration - // so they can be checked for compatibility. - bool VisitEnumDecl(EnumDecl *D) { - if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo()) - add(D); - return true; - } - - // When the overload is not resolved yet, mark all candidates as used. - bool VisitOverloadExpr(OverloadExpr *E) { - for (const auto *ResolutionDecl : E->decls()) - add(ResolutionDecl); - return true; - } - -private: - using Base = RecursiveASTVisitor; - - void add(const Decl *D) { - if (!D || !isNew(D->getCanonicalDecl())) - return; - if (auto SS = StdRecognizer(D)) { - Result.Stdlib.insert(*SS); - return; - } - // Special case RecordDecls, as it is common for them to be forward - // declared multiple times. The most common cases are: - // - Definition available in TU, only mark that one as usage. The rest is - // likely to be unnecessary. This might result in false positives when an - // internal definition is visible. - // - There's a forward declaration in the main file, no need for other - // redecls. - if (const auto *RD = llvm::dyn_cast(D)) { - if (const auto *Definition = RD->getDefinition()) { - Result.User.insert(Definition->getLocation()); - return; - } - if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation())) - return; - } - for (const Decl *Redecl : D->redecls()) - Result.User.insert(Redecl->getLocation()); - } - - bool isNew(const void *P) { return P && Visited.insert(P).second; } - - ReferencedLocations &Result; - llvm::DenseSet Visited; - const SourceManager &SM; - tooling::stdlib::Recognizer StdRecognizer; -}; - -// Given a set of referenced FileIDs, determines all the potentially-referenced -// files and macros by traversing expansion/spelling locations of macro IDs. -// This is used to map the referenced SourceLocations onto real files. -struct ReferencedFilesBuilder { - ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {} - llvm::DenseSet Files; - llvm::DenseSet Macros; - const SourceManager &SM; - - void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); } - - void add(FileID FID, SourceLocation Loc) { - if (FID.isInvalid()) - return; - assert(SM.isInFileID(Loc, FID)); - if (Loc.isFileID()) { - Files.insert(FID); - return; - } - // Don't process the same macro FID twice. - if (!Macros.insert(FID).second) - return; - const auto &Exp = SM.getSLocEntry(FID).getExpansion(); - add(Exp.getSpellingLoc()); - add(Exp.getExpansionLocStart()); - add(Exp.getExpansionLocEnd()); - } -}; - // Returns the range starting at '#' and ending at EOL. Escaped newlines are not // handled. clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) { @@ -255,31 +77,6 @@ return Result; } -// Finds locations of macros referenced from within the main file. That includes -// references that were not yet expanded, e.g `BAR` in `#define FOO BAR`. -void findReferencedMacros(const SourceManager &SM, Preprocessor &PP, - const syntax::TokenBuffer *Tokens, - ReferencedLocations &Result) { - trace::Span Tracer("IncludeCleaner::findReferencedMacros"); - // FIXME(kirillbobyrev): The macros from the main file are collected in - // ParsedAST's MainFileMacros. However, we can't use it here because it - // doesn't handle macro references that were not expanded, e.g. in macro - // definitions or preprocessor-disabled sections. - // - // Extending MainFileMacros to collect missing references and switching to - // this mechanism (as opposed to iterating through all tokens) will improve - // the performance of findReferencedMacros and also improve other features - // relying on MainFileMacros. - for (const syntax::Token &Tok : Tokens->spelledTokens(SM.getMainFileID())) { - auto Macro = locateMacroAt(Tok, PP); - if (!Macro) - continue; - auto Loc = Macro->Info->getDefinitionLoc(); - if (Loc.isValid()) - Result.User.insert(Loc); - // FIXME: support stdlib macros - } -} bool isFilteredByConfig(const Config &Cfg, llvm::StringRef HeaderPath) { // Convert the path to Unix slashes and try to match against the filter. @@ -330,31 +127,6 @@ return true; } -// In case symbols are coming from non self-contained header, we need to find -// its first includer that is self-contained. This is the header users can -// include, so it will be responsible for bringing the symbols from given -// header into the scope. -FileID headerResponsible(FileID ID, const SourceManager &SM, - const IncludeStructure &Includes) { - // Unroll the chain of non self-contained headers until we find the one that - // can be included. - for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID(); - FE = SM.getFileEntryForID(ID)) { - // If FE is nullptr, we consider it to be the responsible header. - if (!FE) - break; - auto HID = Includes.getID(FE); - assert(HID && "We're iterating over headers already existing in " - "IncludeStructure"); - if (Includes.isSelfContained(*HID)) - break; - // The header is not self-contained: put the responsibility for its symbols - // on its includer. - ID = SM.getFileID(SM.getIncludeLoc(ID)); - } - return ID; -} - include_cleaner::Includes convertIncludes(const SourceManager &SM, const llvm::ArrayRef MainFileIncludes) { @@ -546,81 +318,6 @@ } } // namespace -ReferencedLocations findReferencedLocations(ASTContext &Ctx, Preprocessor &PP, - const syntax::TokenBuffer *Tokens) { - trace::Span Tracer("IncludeCleaner::findReferencedLocations"); - ReferencedLocations Result; - const auto &SM = Ctx.getSourceManager(); - ReferencedLocationCrawler Crawler(Result, SM); - Crawler.TraverseAST(Ctx); - if (Tokens) - findReferencedMacros(SM, PP, Tokens, Result); - return Result; -} - -ReferencedLocations findReferencedLocations(ParsedAST &AST) { - return findReferencedLocations(AST.getASTContext(), AST.getPreprocessor(), - &AST.getTokens()); -} - -ReferencedFiles findReferencedFiles( - const ReferencedLocations &Locs, const SourceManager &SM, - llvm::function_ref HeaderResponsible, - llvm::function_ref(FileID)> UmbrellaHeader) { - std::vector Sorted{Locs.User.begin(), Locs.User.end()}; - llvm::sort(Sorted); // Group by FileID. - ReferencedFilesBuilder Builder(SM); - for (auto It = Sorted.begin(); It < Sorted.end();) { - FileID FID = SM.getFileID(*It); - Builder.add(FID, *It); - // Cheaply skip over all the other locations from the same FileID. - // This avoids lots of redundant Loc->File lookups for the same file. - do - ++It; - while (It != Sorted.end() && SM.isInFileID(*It, FID)); - } - - // If a header is not self-contained, we consider its symbols a logical part - // of the including file. Therefore, mark the parents of all used - // non-self-contained FileIDs as used. Perform this on FileIDs rather than - // HeaderIDs, as each inclusion of a non-self-contained file is distinct. - llvm::DenseSet UserFiles; - llvm::StringSet<> PublicHeaders; - for (FileID ID : Builder.Files) { - UserFiles.insert(HeaderResponsible(ID)); - if (auto PublicHeader = UmbrellaHeader(ID)) { - PublicHeaders.insert(*PublicHeader); - } - } - - llvm::DenseSet StdlibFiles; - for (const auto &Symbol : Locs.Stdlib) - for (const auto &Header : Symbol.headers()) - StdlibFiles.insert(Header); - - return {std::move(UserFiles), std::move(StdlibFiles), - std::move(PublicHeaders)}; -} - -ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs, - const IncludeStructure &Includes, - const CanonicalIncludes &CanonIncludes, - const SourceManager &SM) { - return findReferencedFiles( - Locs, SM, - [&SM, &Includes](FileID ID) { - return headerResponsible(ID, SM, Includes); - }, - [&SM, &CanonIncludes](FileID ID) -> std::optional { - auto Entry = SM.getFileEntryRefForID(ID); - if (!Entry) - return std::nullopt; - auto PublicHeader = CanonIncludes.mapHeader(*Entry); - if (PublicHeader.empty()) - return std::nullopt; - return PublicHeader; - }); -} std::vector getUnused(ParsedAST &AST, @@ -648,52 +345,6 @@ return Unused; } -#ifndef NDEBUG -// Is FID a , etc? -static bool isSpecialBuffer(FileID FID, const SourceManager &SM) { - const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile(); - return FI.getName().startswith("<"); -} -#endif - -llvm::DenseSet -translateToHeaderIDs(const ReferencedFiles &Files, - const IncludeStructure &Includes, - const SourceManager &SM) { - trace::Span Tracer("IncludeCleaner::translateToHeaderIDs"); - llvm::DenseSet TranslatedHeaderIDs; - TranslatedHeaderIDs.reserve(Files.User.size()); - for (FileID FID : Files.User) { - const FileEntry *FE = SM.getFileEntryForID(FID); - if (!FE) { - assert(isSpecialBuffer(FID, SM)); - continue; - } - const auto File = Includes.getID(FE); - assert(File); - TranslatedHeaderIDs.insert(*File); - } - for (tooling::stdlib::Header StdlibUsed : Files.Stdlib) - for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed)) - TranslatedHeaderIDs.insert(HID); - return TranslatedHeaderIDs; -} - -// This is the original clangd-own implementation for computing unused -// #includes. Eventually it will be deprecated and replaced by the -// include-cleaner-lib-based implementation. -std::vector computeUnusedIncludes(ParsedAST &AST) { - const auto &SM = AST.getSourceManager(); - - auto Refs = findReferencedLocations(AST); - auto ReferencedFiles = - findReferencedFiles(Refs, AST.getIncludeStructure(), - AST.getCanonicalIncludes(), AST.getSourceManager()); - auto ReferencedHeaders = - translateToHeaderIDs(ReferencedFiles, AST.getIncludeStructure(), SM); - return getUnused(AST, ReferencedHeaders, ReferencedFiles.SpelledUmbrellas); -} - IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) { const auto &SM = AST.getSourceManager(); const auto &Includes = AST.getIncludeStructure(); diff --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp --- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp +++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp @@ -38,7 +38,6 @@ using ::testing::AllOf; using ::testing::ElementsAre; -using ::testing::ElementsAreArray; using ::testing::IsEmpty; using ::testing::Matcher; using ::testing::Pointee; @@ -64,286 +63,6 @@ return "#pragma once\n" + Code.str(); } -TEST(IncludeCleaner, ReferencedLocations) { - struct TestCase { - std::string HeaderCode; - std::string MainCode; - }; - TestCase Cases[] = { - // DeclRefExpr - { - "int ^x();", - "int y = x();", - }, - // RecordDecl - { - "class ^X;", - "X *y;", - }, - // When definition is available, we don't need to mark forward - // declarations as used. - { - "class ^X {}; class X;", - "X *y;", - }, - // We already have forward declaration in the main file, the other - // non-definition declarations are not needed. - { - "class ^X {}; class X;", - "class X; X *y;", - }, - // Nested class definition can occur outside of the parent class - // definition. Bar declaration should be visible to its definition but - // it will always be because we will mark Foo definition as used. - { - "class ^Foo { class Bar; };", - "class Foo::Bar {};", - }, - // TypedefType and UsingDecls - { - "using ^Integer = int;", - "Integer x;", - }, - { - "namespace ns { void ^foo(); void ^foo() {} }", - "using ns::foo;", - }, - { - "namespace ns { void foo(); void foo() {}; }", - "using namespace ns;", - }, - { - // Refs from UsingTypeLoc and implicit constructor! - "struct ^A {}; using B = A; using ^C = B;", - "C a;", - }, - {"namespace ns { template class A {}; } using ns::^A;", - "A* a;"}, - {"namespace ns { template class A {}; } using ns::^A;", - R"cpp( - template