diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h --- a/clang-tools-extra/clangd/IncludeCleaner.h +++ b/clang-tools-extra/clangd/IncludeCleaner.h @@ -52,7 +52,7 @@ /// The output only includes things SourceManager sees as files (not macro IDs). /// This can include , etc that are not true files. llvm::DenseSet findReferencedFiles(const ReferencedLocations &Locs, - const SourceManager &SM); + const Preprocessor &PP); /// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs). /// FileIDs that are not true files ( etc) are dropped. diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -16,6 +16,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/Syntax/Tokens.h" @@ -234,20 +235,37 @@ llvm::DenseSet findReferencedFiles(const llvm::DenseSet &Locs, - const SourceManager &SM) { + const Preprocessor &PP) { std::vector Sorted{Locs.begin(), Locs.end()}; llvm::sort(Sorted); // Group by FileID. - ReferencedFiles Result(SM); + const SourceManager &SM = PP.getSourceManager(); + ReferencedFiles Files(SM); for (auto It = Sorted.begin(); It < Sorted.end();) { FileID FID = SM.getFileID(*It); - Result.add(FID, *It); + Files.add(FID, *It); // Cheaply skip over all the other locations from the same FileID. // This avoids lots of redundant Loc->File lookups for the same file. do ++It; while (It != Sorted.end() && SM.isInFileID(*It, FID)); } - return std::move(Result.Files); + // If a header is not self-contained, we consider its symbols a logical part + // of the including file. Therefore, mark the parents of all used + // non-self-contained FileIDs as used. Perform this on FileIDs rather than + // HeaderIDs, as each inclusion of a non-self-contained file is distinct. + llvm::DenseSet Result; + for (FileID ID : Files.Files) { + // Unroll the chain of non self-contained headers to get to the first one + // with the header guard. + for (const FileEntry *FE = SM.getFileEntryForID(ID); + ID != SM.getMainFileID() && FE && + !isSelfContainedHeader(PP, ID, FE);) { + ID = SM.getFileID(SM.getIncludeLoc(ID)); + FE = SM.getFileEntryForID(ID); + } + Result.insert(ID); + } + return Result; } std::vector @@ -304,12 +322,7 @@ const auto &SM = AST.getSourceManager(); auto Refs = findReferencedLocations(AST); - // FIXME(kirillbobyrev): Attribute the symbols from non self-contained - // headers to their parents while the information about respective - // SourceLocations and FileIDs is not lost. It is not possible to do that - // later when non-guarded headers included multiple times will get same - // HeaderID. - auto ReferencedFileIDs = findReferencedFiles(Refs, SM); + auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getPreprocessor()); auto ReferencedHeaders = translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM); return getUnused(AST, ReferencedHeaders); diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -324,6 +324,13 @@ /// Returns true if the given location is in a generated protobuf file. bool isProtoFile(SourceLocation Loc, const SourceManager &SourceMgr); +/// Use heuristics to check whether the header is self-contained (has header +/// guard, does not rely on preprocessor state). +/// When \p ExpensiveCheck is true, this will read a portion of the file which +/// be discouraged in performance-sensitive context. +bool isSelfContainedHeader(const Preprocessor &PP, FileID FID, + const FileEntry *FE, bool ExpensiveCheck = false); + } // namespace clangd } // namespace clang #endif diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -50,6 +50,44 @@ namespace clang { namespace clangd { +namespace { + +// Is Line an #if or #ifdef directive? +static bool isIf(llvm::StringRef Line) { + Line = Line.ltrim(); + if (!Line.consume_front("#")) + return false; + Line = Line.ltrim(); + return Line.startswith("if"); +} + +// Is Line an #error directive mentioning includes? +static bool isErrorAboutInclude(llvm::StringRef Line) { + Line = Line.ltrim(); + if (!Line.consume_front("#")) + return false; + Line = Line.ltrim(); + if (!Line.startswith("error")) + return false; + return Line.contains_insensitive( + "includ"); // Matches "include" or "including". +} + +// Heuristically headers that only want to be included via an umbrella. +static bool isDontIncludeMeHeader(llvm::StringRef Content) { + llvm::StringRef Line; + // Only sniff up to 100 lines or 10KB. + Content = Content.take_front(100 * 100); + for (unsigned I = 0; I < 100 && !Content.empty(); ++I) { + std::tie(Line, Content) = Content.split('\n'); + if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first)) + return true; + } + return false; +} + +} // namespace + // Here be dragons. LSP positions use columns measured in *UTF-16 code units*! // Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial. @@ -1182,5 +1220,20 @@ return SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT); } +bool isSelfContainedHeader(const Preprocessor &PP, FileID FID, + const FileEntry *FE, bool ExpensiveCheck) { + // FIXME: Should files that have been #import'd be considered + // self-contained? That's really a property of the includer, + // not of the file. + if (!PP.getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) && + !PP.getHeaderSearchInfo().hasFileBeenImported(FE)) + return false; + // This pattern indicates that a header can't be used without + // particular preprocessor state, usually set up by another header. + if (ExpensiveCheck) + return !isDontIncludeMeHeader(PP.getSourceManager().getBufferData(FID)); + return true; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -266,7 +266,7 @@ return toURI(Canonical); } } - if (!isSelfContainedHeader(FID, FE)) { + if (!isSelfContainedHeader(*PP, FID, FE, /*ExpensiveCheck=*/true)) { // A .inc or .def file is often included into a real header to define // symbols (e.g. LLVM tablegen files). if (Filename.endswith(".inc") || Filename.endswith(".def")) @@ -279,20 +279,6 @@ return toURI(FE); } - bool isSelfContainedHeader(FileID FID, const FileEntry *FE) { - // FIXME: Should files that have been #import'd be considered - // self-contained? That's really a property of the includer, - // not of the file. - if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) && - !PP->getHeaderSearchInfo().hasFileBeenImported(FE)) - return false; - // This pattern indicates that a header can't be used without - // particular preprocessor state, usually set up by another header. - if (isDontIncludeMeHeader(SM.getBufferData(FID))) - return false; - return true; - } - // Is Line an #if or #ifdef directive? static bool isIf(llvm::StringRef Line) { Line = Line.ltrim(); diff --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp --- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp +++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp @@ -281,7 +281,8 @@ auto &SM = AST.getSourceManager(); auto &Includes = AST.getIncludeStructure(); - auto ReferencedFiles = findReferencedFiles(findReferencedLocations(AST), SM); + auto ReferencedFiles = + findReferencedFiles(findReferencedLocations(AST), AST.getPreprocessor()); llvm::StringSet<> ReferencedFileNames; for (FileID FID : ReferencedFiles) ReferencedFileNames.insert( @@ -303,6 +304,52 @@ EXPECT_THAT(getUnused(AST, ReferencedHeaders), ::testing::IsEmpty()); } +TEST(IncludeCleaner, NonSelfContainedHeaders) { + TestTU TU; + TU.Code = R"cpp( + #include "bar.h" + #include "foo.h" + + int LocalFoo = foo::Variable, + LocalBar = bar::Variable; + )cpp"; + TU.AdditionalFiles["bar.h"] = R"cpp( + #pragma once + namespace bar { + #include "indirection.h" + } + )cpp"; + TU.AdditionalFiles["foo.h"] = R"cpp( + #pragma once + namespace foo { + #include "unguarded.h" + } + )cpp"; + TU.AdditionalFiles["indirection.h"] = R"cpp( + #include "unguarded.h" + )cpp"; + TU.AdditionalFiles["unguarded.h"] = R"cpp( + constexpr int Variable = 42; + )cpp"; + + ParsedAST AST = TU.build(); + + auto ReferencedFiles = + findReferencedFiles(findReferencedLocations(AST), AST.getPreprocessor()); + llvm::StringSet<> ReferencedFileNames; + auto &SM = AST.getSourceManager(); + for (FileID FID : ReferencedFiles) + ReferencedFileNames.insert( + SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename()); + // Note that we have uplifted the referenced files from non self-contained + // headers to header-guarded ones. + for (const auto &Key : ReferencedFileNames.keys()) { + llvm::outs() << "Key: " << Key << '\n'; + } + EXPECT_THAT(ReferencedFileNames.keys(), + UnorderedElementsAre(testPath("bar.h"), testPath("foo.h"))); +} + } // namespace } // namespace clangd } // namespace clang