diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h --- a/clang-tools-extra/clangd/IncludeCleaner.h +++ b/clang-tools-extra/clangd/IncludeCleaner.h @@ -52,6 +52,7 @@ /// The output only includes things SourceManager sees as files (not macro IDs). /// This can include , etc that are not true files. llvm::DenseSet findReferencedFiles(const ReferencedLocations &Locs, + const IncludeStructure &Includes, const SourceManager &SM); /// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs). diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -8,6 +8,7 @@ #include "IncludeCleaner.h" #include "Config.h" +#include "Headers.h" #include "ParsedAST.h" #include "Protocol.h" #include "SourceCode.h" @@ -16,6 +17,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/Syntax/Tokens.h" @@ -221,6 +223,31 @@ return true; } +// In case symbols are coming from non self-contained header, we need to find +// its first includer that is self-contained. This is the header users can +// include, so it will be responsible for bringing the symbols from given +// header into the scope. +FileID headerResponsible(FileID ID, const SourceManager &SM, + const IncludeStructure &Includes) { + // Unroll the chain of non self-contained headers until we find the one that + // can be included. + for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID(); + FE = SM.getFileEntryForID(ID)) { + // If FE is nullptr, we consider it to be the responsible header. + if (!FE) + break; + auto HID = Includes.getID(FE); + assert(HID && "We're iterating over headers already existing in " + "IncludeStructure"); + if (Includes.isSelfContained(*HID)) + break; + // The header is not self-contained: put the responsibility for its symbols + // on its includer. + ID = SM.getFileID(SM.getIncludeLoc(ID)); + } + return ID; +} + } // namespace ReferencedLocations findReferencedLocations(ParsedAST &AST) { @@ -234,20 +261,27 @@ llvm::DenseSet findReferencedFiles(const llvm::DenseSet &Locs, - const SourceManager &SM) { + const IncludeStructure &Includes, const SourceManager &SM) { std::vector Sorted{Locs.begin(), Locs.end()}; llvm::sort(Sorted); // Group by FileID. - ReferencedFiles Result(SM); + ReferencedFiles Files(SM); for (auto It = Sorted.begin(); It < Sorted.end();) { FileID FID = SM.getFileID(*It); - Result.add(FID, *It); + Files.add(FID, *It); // Cheaply skip over all the other locations from the same FileID. // This avoids lots of redundant Loc->File lookups for the same file. do ++It; while (It != Sorted.end() && SM.isInFileID(*It, FID)); } - return std::move(Result.Files); + // If a header is not self-contained, we consider its symbols a logical part + // of the including file. Therefore, mark the parents of all used + // non-self-contained FileIDs as used. Perform this on FileIDs rather than + // HeaderIDs, as each inclusion of a non-self-contained file is distinct. + llvm::DenseSet Result; + for (FileID ID : Files.Files) + Result.insert(headerResponsible(ID, SM, Includes)); + return Result; } std::vector @@ -304,12 +338,8 @@ const auto &SM = AST.getSourceManager(); auto Refs = findReferencedLocations(AST); - // FIXME(kirillbobyrev): Attribute the symbols from non self-contained - // headers to their parents while the information about respective - // SourceLocations and FileIDs is not lost. It is not possible to do that - // later when non-guarded headers included multiple times will get same - // HeaderID. - auto ReferencedFileIDs = findReferencedFiles(Refs, SM); + auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(), + AST.getSourceManager()); auto ReferencedHeaders = translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM); return getUnused(AST, ReferencedHeaders); diff --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp --- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp +++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp @@ -281,7 +281,8 @@ auto &SM = AST.getSourceManager(); auto &Includes = AST.getIncludeStructure(); - auto ReferencedFiles = findReferencedFiles(findReferencedLocations(AST), SM); + auto ReferencedFiles = + findReferencedFiles(findReferencedLocations(AST), Includes, SM); llvm::StringSet<> ReferencedFileNames; for (FileID FID : ReferencedFiles) ReferencedFileNames.insert( @@ -303,6 +304,80 @@ EXPECT_THAT(getUnused(AST, ReferencedHeaders), ::testing::IsEmpty()); } +TEST(IncludeCleaner, DistinctUnguardedInclusions) { + TestTU TU; + TU.Code = R"cpp( + #include "bar.h" + #include "foo.h" + + int LocalFoo = foo::Variable; + )cpp"; + TU.AdditionalFiles["foo.h"] = R"cpp( + #pragma once + namespace foo { + #include "unguarded.h" + } + )cpp"; + TU.AdditionalFiles["bar.h"] = R"cpp( + #pragma once + namespace bar { + #include "unguarded.h" + } + )cpp"; + TU.AdditionalFiles["unguarded.h"] = R"cpp( + constexpr int Variable = 42; + )cpp"; + + ParsedAST AST = TU.build(); + + auto ReferencedFiles = + findReferencedFiles(findReferencedLocations(AST), + AST.getIncludeStructure(), AST.getSourceManager()); + llvm::StringSet<> ReferencedFileNames; + auto &SM = AST.getSourceManager(); + for (FileID FID : ReferencedFiles) + ReferencedFileNames.insert( + SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename()); + // Note that we have uplifted the referenced files from non self-contained + // headers to header-guarded ones. + EXPECT_THAT(ReferencedFileNames.keys(), + UnorderedElementsAre(testPath("foo.h"))); +} + +TEST(IncludeCleaner, NonSelfContainedHeaders) { + TestTU TU; + TU.Code = R"cpp( + #include "foo.h" + + int LocalFoo = Variable; + )cpp"; + TU.AdditionalFiles["foo.h"] = R"cpp( + #pragma once + #include "indirection.h" + )cpp"; + TU.AdditionalFiles["indirection.h"] = R"cpp( + #include "unguarded.h" + )cpp"; + TU.AdditionalFiles["unguarded.h"] = R"cpp( + constexpr int Variable = 42; + )cpp"; + + ParsedAST AST = TU.build(); + + auto ReferencedFiles = + findReferencedFiles(findReferencedLocations(AST), + AST.getIncludeStructure(), AST.getSourceManager()); + llvm::StringSet<> ReferencedFileNames; + auto &SM = AST.getSourceManager(); + for (FileID FID : ReferencedFiles) + ReferencedFileNames.insert( + SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename()); + // Note that we have uplifted the referenced files from non self-contained + // headers to header-guarded ones. + EXPECT_THAT(ReferencedFileNames.keys(), + UnorderedElementsAre(testPath("foo.h"))); +} + } // namespace } // namespace clangd } // namespace clang