diff --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h --- a/clang-tools-extra/clangd/Headers.h +++ b/clang-tools-extra/clangd/Headers.h @@ -58,6 +58,7 @@ unsigned HashOffset = 0; // Byte offset from start of file to #. int HashLine = 0; // Line number containing the directive, 0-indexed. SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User; + FileID ID; }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion &); bool operator==(const Inclusion &LHS, const Inclusion &RHS); @@ -112,6 +113,15 @@ // in any non-preamble inclusions. class IncludeStructure { public: + using File = unsigned; + + llvm::Optional getFile(const FileEntry *Entry) const { + auto It = NameToIndex.find(Entry->getName()); + if (It == NameToIndex.end()) + return llvm::None; + return It->second; + } + std::vector MainFileIncludes; // Return all transitively reachable files. @@ -129,6 +139,23 @@ llvm::StringRef IncludedName, llvm::StringRef IncludedRealName); + // Classifying the main-file includes as "used" or "unused" is subtle + // (consider transitive includes), so we inject the algorithm. + + ArrayRef getIncludedFiles(File); + // Maps including files (from) to included files (to). + using AbstractIncludeGraph = llvm::DenseMap>; + + llvm::ArrayRef getChildren(const FileEntry *Entry) const { + auto FoundFile = getFile(Entry); + if (!FoundFile) + return llvm::None; + auto It = IncludeChildren.find(*FoundFile); + if (It == IncludeChildren.end()) + return llvm::None; + return It->second; + } + private: // Identifying files in a way that persists from preamble build to subsequent // builds is surprisingly hard. FileID is unavailable in InclusionDirective(), @@ -136,10 +163,10 @@ // We use the FileEntry::Name, which is stable, interned into a "file index". // The paths we want to expose are the RealPathName, so store those too. std::vector RealPathNames; // In file index order. - unsigned fileIndex(llvm::StringRef Name); - llvm::StringMap NameToIndex; // Values are file indexes. + File fileIndex(llvm::StringRef Name); + llvm::StringMap NameToIndex; // Values are file indexes. // Maps a file's index to that of the files it includes. - llvm::DenseMap> IncludeChildren; + AbstractIncludeGraph IncludeChildren; }; /// Returns a PPCallback that visits all inclusions in the main file. diff --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp --- a/clang-tools-extra/clangd/Headers.cpp +++ b/clang-tools-extra/clangd/Headers.cpp @@ -56,6 +56,8 @@ SM.getLineNumber(SM.getFileID(HashLoc), Inc.HashOffset) - 1; Inc.FileKind = FileKind; Inc.Directive = IncludeTok.getIdentifierInfo()->getPPKeywordID(); + if (File) + Inc.ID = SM.translateFile(File); } // Record include graph (not just for main-file includes) @@ -164,7 +166,7 @@ IncludeChildren[Parent].push_back(Child); } -unsigned IncludeStructure::fileIndex(llvm::StringRef Name) { +IncludeStructure::File IncludeStructure::fileIndex(llvm::StringRef Name) { auto R = NameToIndex.try_emplace(Name, RealPathNames.size()); if (R.second) RealPathNames.emplace_back(); @@ -177,7 +179,7 @@ llvm::StringMap Result; Result[Root] = 0; std::vector CurrentLevel; - llvm::DenseSet Seen; + llvm::DenseSet Seen; auto It = NameToIndex.find(Root); if (It != NameToIndex.end()) { CurrentLevel.push_back(It->second); diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h --- a/clang-tools-extra/clangd/IncludeCleaner.h +++ b/clang-tools-extra/clangd/IncludeCleaner.h @@ -25,6 +25,7 @@ #include "ParsedAST.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseSet.h" +#include namespace clang { namespace clangd { @@ -46,6 +47,29 @@ /// - err on the side of reporting all possible locations ReferencedLocations findReferencedLocations(ParsedAST &AST); +/// Retrieves IDs of all files containing SourceLocations from \p Locs. +/// FIXME: Those locations could be within macro expansions and are resolved to +/// their spelling/expansion locations. +llvm::DenseSet findReferencedFiles(const ReferencedLocations &Locs, + const SourceManager &SM); + +inline llvm::DenseMap directlyReferencedFiles( + const IncludeStructure::AbstractIncludeGraph &Graph, + const llvm::DenseSet &Referenced, + unsigned EntryPoint) { + llvm::DenseMap Result; + for (IncludeStructure::File Inclusion : Graph.lookup(EntryPoint)) + Result.try_emplace(Inclusion, Referenced.contains(Inclusion)); + return Result; +} + +/// Retrieves headers that are referenced from the main file (\p EntryPoint) +/// but not used. +std::vector +getUnused(IncludeStructure::File EntryPoint, const IncludeStructure &Structure, + const llvm::DenseSet &ReferencedFiles, + const SourceManager &SM); + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -98,6 +98,35 @@ llvm::DenseSet Visited; }; +// Given a set of referenced FileIDs, determines all the potentially-referenced +// files and macros by traversing expansion/spelling locations of macro IDs. +// This is used to map the referenced SourceLocations onto real files. +struct ReferencedFiles { + ReferencedFiles(const SourceManager &SM) : SM(SM) {} + llvm::DenseSet Files; + llvm::DenseSet Macros; + const SourceManager &SM; + + void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); } + + void add(FileID FID, SourceLocation Loc) { + if (FID.isInvalid()) + return; + assert(SM.isInFileID(Loc, FID)); + if (Loc.isFileID()) { + Files.insert(FID); + return; + } + // Don't process the same macro FID twice. + if (!Macros.insert(FID).second) + return; + const auto &Exp = SM.getSLocEntry(FID).getExpansion(); + add(Exp.getSpellingLoc()); + add(Exp.getExpansionLocStart()); + add(Exp.getExpansionLocEnd()); + } +}; + } // namespace ReferencedLocations findReferencedLocations(ParsedAST &AST) { @@ -108,5 +137,45 @@ return Result; } +llvm::DenseSet +findReferencedFiles(const llvm::DenseSet &Locs, + const SourceManager &SM) { + std::vector Sorted{Locs.begin(), Locs.end()}; + llvm::sort(Sorted); // Group by FileID. + ReferencedFiles Result(SM); + for (auto It = Sorted.begin(); It < Sorted.end();) { + FileID FID = SM.getFileID(*It); + Result.add(FID, *It); + // Cheaply skip over all the other locations from the same FileID. + // This avoids lots of redundant Loc->File lookups for the same file. + do + ++It; + while (It != Sorted.end() && SM.isInFileID(*It, FID)); + } + return std::move(Result.Files); +} + +std::vector +getUnused(IncludeStructure::File EntryPoint, const IncludeStructure &Structure, + const llvm::DenseSet &ReferencedFiles, + const SourceManager &SM) { + std::vector Unused; + for (auto &MFI : Structure.MainFileIncludes) { + // FIXME: Skip includes that are not self-contained. + auto It = Structure.getFile(SM.getFileEntryForID(MFI.ID)); + if (!It) { + elog("Missing IncludeStructure::File for {0}", + SM.getComposedLoc(MFI.ID, 0).printToString(SM)); + continue; + } + bool Used = ReferencedFiles.find(*It) != ReferencedFiles.end(); + if (!Used) { + Unused.push_back(MFI); + } + dlog("{0} is {1}", MFI.Written, Used ? "USED" : "UNUSED"); + } + return Unused; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/ParsedAST.h b/clang-tools-extra/clangd/ParsedAST.h --- a/clang-tools-extra/clangd/ParsedAST.h +++ b/clang-tools-extra/clangd/ParsedAST.h @@ -116,6 +116,8 @@ return Resolver.get(); } + std::vector computeUnusedIncludes(); + private: ParsedAST(llvm::StringRef Version, std::shared_ptr Preamble, diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -18,6 +18,7 @@ #include "FeatureModule.h" #include "Headers.h" #include "HeuristicResolver.h" +#include "IncludeCleaner.h" #include "IncludeFixer.h" #include "Preamble.h" #include "SourceCode.h" @@ -614,5 +615,35 @@ return llvm::None; return llvm::StringRef(Preamble->Version); } + +std::vector ParsedAST::computeUnusedIncludes() { + const auto &SM = getSourceManager(); + + auto Refs = findReferencedLocations(*this); + auto ReferencedFileIDs = findReferencedFiles(Refs, SM); + llvm::DenseSet ReferencedFiles; + ReferencedFiles.reserve(ReferencedFileIDs.size()); + for (FileID FID : ReferencedFileIDs) { + const FileEntry *FE = SM.getFileEntryForID(FID); + if (!FE) { + elog("Missing FE for {0}", SM.getComposedLoc(FID, 0).printToString(SM)); + continue; + } + const auto File = Includes.getFile(FE); + if (!File) { + elog("Missing FE for {0}", SM.getComposedLoc(FID, 0).printToString(SM)); + continue; + } + ReferencedFiles.insert(*File); + } + auto MainFileIndex = + Includes.getFile(SM.getFileEntryForID(SM.getMainFileID())); + if (!MainFileIndex) { + elog("Missing MainFile in the IncludeStructure"); + return {}; + } + return getUnused(*MainFileIndex, Includes, ReferencedFiles, SM); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp --- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp +++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp @@ -131,6 +131,35 @@ } } +TEST(IncludeCleaner, GetUnusedHeaders) { + llvm::StringLiteral MainFile = R"cpp( + #include "a.h" + #include "b.h" + #include "dir/c.h" + #include "unused.h" + void foo() { + a(); + b(); + c(); + })cpp"; + // Build expected ast with symbols coming from headers. + TestTU TU; + TU.Filename = "foo.cpp"; + TU.AdditionalFiles["foo.h"] = "void foo();"; + TU.AdditionalFiles["a.h"] = "void a();"; + TU.AdditionalFiles["b.h"] = "void b();"; + TU.AdditionalFiles["dir/c.h"] = "void c();"; + TU.AdditionalFiles["unused.h"] = "void unused();"; + TU.ExtraArgs = {"-I" + testPath("dir")}; + TU.Code = MainFile.str(); + ParsedAST AST = TU.build(); + auto UnusedIncludes = AST.computeUnusedIncludes(); + std::vector UnusedHeaders; + for (const auto &Include : UnusedIncludes) { + UnusedHeaders.push_back(Include.Written); + } +} + } // namespace } // namespace clangd } // namespace clang