diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -1273,7 +1273,8 @@ loadMainFilePreambleMacros(Clang->getPreprocessor(), Input.Preamble); if (Includes) Clang->getPreprocessor().addPPCallbacks( - Includes->collect(Clang->getSourceManager())); + Includes->collect(Clang->getSourceManager(), + Clang->getPreprocessor().getHeaderSearchInfo())); if (llvm::Error Err = Action.Execute()) { log("Execute() failed when running codeComplete for {0}: {1}", Input.FileName, toString(std::move(Err))); diff --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h --- a/clang-tools-extra/clangd/Headers.h +++ b/clang-tools-extra/clangd/Headers.h @@ -21,6 +21,7 @@ #include "clang/Lex/PPCallbacks.h" #include "clang/Tooling/Inclusions/HeaderIncludes.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/Error.h" @@ -61,6 +62,7 @@ unsigned HashOffset = 0; // Byte offset from start of file to #. int HashLine = 0; // Line number containing the directive, 0-indexed. SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User; + bool SelfContained = true; llvm::Optional HeaderID; }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion &); @@ -123,7 +125,8 @@ // Returns a PPCallback that visits all inclusions in the main file and // populates the structure. - std::unique_ptr collect(const SourceManager &SM); + std::unique_ptr collect(const SourceManager &SM, + HeaderSearch &HeaderInfo); // HeaderID identifies file in the include graph. It corresponds to a // FileEntry rather than a FileID, but stays stable across preamble & main @@ -151,6 +154,9 @@ // Maps HeaderID to the ids of the files included from it. llvm::DenseMap> IncludeChildren; + // Contains HeaderIDs of all self-contained entries in the IncludeStructure. + llvm::DenseSet SelfContained; + std::vector MainFileIncludes; // We reserve HeaderID(0) for the main file and will manually check for that diff --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp --- a/clang-tools-extra/clangd/Headers.cpp +++ b/clang-tools-extra/clangd/Headers.cpp @@ -26,8 +26,9 @@ class RecordHeaders : public PPCallbacks { public: - RecordHeaders(const SourceManager &SM, IncludeStructure *Out) - : SM(SM), Out(Out) {} + RecordHeaders(const SourceManager &SM, HeaderSearch &HeaderInfo, + IncludeStructure *Out) + : SM(SM), HeaderInfo(HeaderInfo), Out(Out) {} // Record existing #includes - both written and resolved paths. Only #includes // in the main file are collected. @@ -85,10 +86,16 @@ InBuiltinFile = true; } break; - case PPCallbacks::ExitFile: + case PPCallbacks::ExitFile: { if (PrevFID == BuiltinFile) InBuiltinFile = false; + // At the file exit time HeaderSearchInfo is valid and can be used to + // determine whether the file was a self-contained header or not. + if (const FileEntry *FE = SM.getFileEntryForID(PrevFID)) + if (isSelfContainedHeader(FE, PrevFID, SM, HeaderInfo)) + Out->SelfContained.insert(*Out->getID(SM.getFileEntryForID(PrevFID))); break; + } case PPCallbacks::RenameFile: case PPCallbacks::SystemHeaderPragma: break; @@ -97,6 +104,7 @@ private: const SourceManager &SM; + HeaderSearch &HeaderInfo; // Set after entering the file. FileID BuiltinFile; // Indicates whether file is part of include stack. @@ -152,9 +160,9 @@ } std::unique_ptr -IncludeStructure::collect(const SourceManager &SM) { +IncludeStructure::collect(const SourceManager &SM, HeaderSearch &HeaderInfo) { MainFileEntry = SM.getFileEntryForID(SM.getMainFileID()); - return std::make_unique(SM, this); + return std::make_unique(SM, HeaderInfo, this); } llvm::Optional diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h --- a/clang-tools-extra/clangd/IncludeCleaner.h +++ b/clang-tools-extra/clangd/IncludeCleaner.h @@ -52,6 +52,7 @@ /// The output only includes things SourceManager sees as files (not macro IDs). /// This can include , etc that are not true files. llvm::DenseSet findReferencedFiles(const ReferencedLocations &Locs, + const IncludeStructure &Includes, const SourceManager &SM); /// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs). diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -8,6 +8,7 @@ #include "IncludeCleaner.h" #include "Config.h" +#include "Headers.h" #include "ParsedAST.h" #include "Protocol.h" #include "SourceCode.h" @@ -16,6 +17,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/Syntax/Tokens.h" @@ -234,20 +236,35 @@ llvm::DenseSet findReferencedFiles(const llvm::DenseSet &Locs, - const SourceManager &SM) { + const IncludeStructure &Includes, const SourceManager &SM) { std::vector Sorted{Locs.begin(), Locs.end()}; llvm::sort(Sorted); // Group by FileID. - ReferencedFiles Result(SM); + ReferencedFiles Files(SM); for (auto It = Sorted.begin(); It < Sorted.end();) { FileID FID = SM.getFileID(*It); - Result.add(FID, *It); + Files.add(FID, *It); // Cheaply skip over all the other locations from the same FileID. // This avoids lots of redundant Loc->File lookups for the same file. do ++It; while (It != Sorted.end() && SM.isInFileID(*It, FID)); } - return std::move(Result.Files); + // If a header is not self-contained, we consider its symbols a logical part + // of the including file. Therefore, mark the parents of all used + // non-self-contained FileIDs as used. Perform this on FileIDs rather than + // HeaderIDs, as each inclusion of a non-self-contained file is distinct. + llvm::DenseSet Result; + for (FileID ID : Files.Files) { + // Unroll the chain of non self-contained headers to get to the first one + // with the header guard. + for (const FileEntry *FE = SM.getFileEntryForID(ID); + ID != SM.getMainFileID() && FE && + !Includes.SelfContained.contains(*Includes.getID(FE)); + ID = SM.getFileID(SM.getIncludeLoc(ID)), FE = SM.getFileEntryForID(ID)) + ; + Result.insert(ID); + } + return Result; } std::vector @@ -304,12 +321,8 @@ const auto &SM = AST.getSourceManager(); auto Refs = findReferencedLocations(AST); - // FIXME(kirillbobyrev): Attribute the symbols from non self-contained - // headers to their parents while the information about respective - // SourceLocations and FileIDs is not lost. It is not possible to do that - // later when non-guarded headers included multiple times will get same - // HeaderID. - auto ReferencedFileIDs = findReferencedFiles(Refs, SM); + auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(), + AST.getSourceManager()); auto ReferencedHeaders = translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM); return getUnused(AST, ReferencedHeaders); diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -447,7 +447,8 @@ // Otherwise we would collect the replayed includes again... // (We can't *just* use the replayed includes, they don't have Resolved path). Clang->getPreprocessor().addPPCallbacks( - Includes.collect(Clang->getSourceManager())); + Includes.collect(Clang->getSourceManager(), + Clang->getPreprocessor().getHeaderSearchInfo())); // Copy over the macros in the preamble region of the main file, and combine // with non-preamble macros below. MainFileMacros Macros; diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -23,6 +23,7 @@ #include "clang/Basic/TokenKinds.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" +#include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" @@ -98,6 +99,7 @@ CanonIncludes.addSystemHeadersMapping(CI.getLangOpts()); LangOpts = &CI.getLangOpts(); SourceMgr = &CI.getSourceManager(); + HeaderInfo = &CI.getPreprocessor().getHeaderSearchInfo(); } std::unique_ptr createPPCallbacks() override { @@ -105,7 +107,7 @@ "SourceMgr and LangOpts must be set at this point"); return std::make_unique( - Includes.collect(*SourceMgr), + Includes.collect(*SourceMgr, *HeaderInfo), std::make_unique( std::make_unique(*SourceMgr, Macros), collectPragmaMarksCallback(*SourceMgr, Marks))); @@ -140,6 +142,7 @@ std::unique_ptr IWYUHandler = nullptr; const clang::LangOptions *LangOpts = nullptr; const SourceManager *SourceMgr = nullptr; + HeaderSearch *HeaderInfo = nullptr; }; // Represents directives other than includes, where basic textual information is @@ -286,7 +289,7 @@ const auto &SM = Clang->getSourceManager(); Preprocessor &PP = Clang->getPreprocessor(); IncludeStructure Includes; - PP.addPPCallbacks(Includes.collect(SM)); + PP.addPPCallbacks(Includes.collect(SM, PP.getHeaderSearchInfo())); ScannedPreamble SP; SP.Bounds = Bounds; PP.addPPCallbacks( diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -21,6 +21,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" +#include "clang/Lex/HeaderSearch.h" #include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Syntax/Tokens.h" #include "llvm/ADT/StringRef.h" @@ -324,6 +325,12 @@ /// Returns true if the given location is in a generated protobuf file. bool isProtoFile(SourceLocation Loc, const SourceManager &SourceMgr); +/// Use heuristics to check whether the header is self-contained (has header +/// guard, does not rely on preprocessor state). This will read a portion of the +/// file which might be discouraged in performance-sensitive context. +bool isSelfContainedHeader(const FileEntry *FE, FileID ID, + const SourceManager &SM, HeaderSearch &HeaderInfo); + } // namespace clangd } // namespace clang #endif diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -50,6 +50,46 @@ namespace clang { namespace clangd { +namespace { + +// Is Line an #if or #ifdef directive? +// FIXME: This makes headers with #ifdef LINUX/WINDOWS/MACOS marked as non +// self-contained and is probably not what we want. +bool isIf(llvm::StringRef Line) { + Line = Line.ltrim(); + if (!Line.consume_front("#")) + return false; + Line = Line.ltrim(); + return Line.startswith("if"); +} + +// Is Line an #error directive mentioning includes? +bool isErrorAboutInclude(llvm::StringRef Line) { + Line = Line.ltrim(); + if (!Line.consume_front("#")) + return false; + Line = Line.ltrim(); + if (!Line.startswith("error")) + return false; + return Line.contains_insensitive( + "includ"); // Matches "include" or "including". +} + +// Heuristically headers that only want to be included via an umbrella. +bool isDontIncludeMeHeader(llvm::StringRef Content) { + llvm::StringRef Line; + // Only sniff up to 100 lines or 10KB. + Content = Content.take_front(100 * 100); + for (unsigned I = 0; I < 100 && !Content.empty(); ++I) { + std::tie(Line, Content) = Content.split('\n'); + if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first)) + return true; + } + return false; +} + +} // namespace + // Here be dragons. LSP positions use columns measured in *UTF-16 code units*! // Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial. @@ -1182,5 +1222,18 @@ return SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT); } +bool isSelfContainedHeader(const FileEntry *FE, FileID FID, + const SourceManager &SM, HeaderSearch &HeaderInfo) { + // FIXME: Should files that have been #import'd be considered + // self-contained? That's really a property of the includer, + // not of the file. + if (!HeaderInfo.isFileMultipleIncludeGuarded(FE) && + !HeaderInfo.hasFileBeenImported(FE)) + return false; + // This pattern indicates that a header can't be used without + // particular preprocessor state, usually set up by another header. + return !isDontIncludeMeHeader(SM.getBufferData(FID)); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -266,7 +266,8 @@ return toURI(Canonical); } } - if (!isSelfContainedHeader(FID, FE)) { + if (!isSelfContainedHeader(FE, FID, PP->getSourceManager(), + PP->getHeaderSearchInfo())) { // A .inc or .def file is often included into a real header to define // symbols (e.g. LLVM tablegen files). if (Filename.endswith(".inc") || Filename.endswith(".def")) @@ -278,54 +279,6 @@ // Standard case: just insert the file itself. return toURI(FE); } - - bool isSelfContainedHeader(FileID FID, const FileEntry *FE) { - // FIXME: Should files that have been #import'd be considered - // self-contained? That's really a property of the includer, - // not of the file. - if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) && - !PP->getHeaderSearchInfo().hasFileBeenImported(FE)) - return false; - // This pattern indicates that a header can't be used without - // particular preprocessor state, usually set up by another header. - if (isDontIncludeMeHeader(SM.getBufferData(FID))) - return false; - return true; - } - - // Is Line an #if or #ifdef directive? - static bool isIf(llvm::StringRef Line) { - Line = Line.ltrim(); - if (!Line.consume_front("#")) - return false; - Line = Line.ltrim(); - return Line.startswith("if"); - } - - // Is Line an #error directive mentioning includes? - static bool isErrorAboutInclude(llvm::StringRef Line) { - Line = Line.ltrim(); - if (!Line.consume_front("#")) - return false; - Line = Line.ltrim(); - if (!Line.startswith("error")) - return false; - return Line.contains_insensitive( - "includ"); // Matches "include" or "including". - } - - // Heuristically headers that only want to be included via an umbrella. - static bool isDontIncludeMeHeader(llvm::StringRef Content) { - llvm::StringRef Line; - // Only sniff up to 100 lines or 10KB. - Content = Content.take_front(100 * 100); - for (unsigned I = 0; I < 100 && !Content.empty(); ++I) { - std::tie(Line, Content) = Content.split('\n'); - if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first)) - return true; - } - return false; - } }; // Return the symbol location of the token at \p TokLoc. diff --git a/clang-tools-extra/clangd/unittests/HeadersTests.cpp b/clang-tools-extra/clangd/unittests/HeadersTests.cpp --- a/clang-tools-extra/clangd/unittests/HeadersTests.cpp +++ b/clang-tools-extra/clangd/unittests/HeadersTests.cpp @@ -81,7 +81,8 @@ Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])); IncludeStructure Includes; Clang->getPreprocessor().addPPCallbacks( - Includes.collect(Clang->getSourceManager())); + Includes.collect(Clang->getSourceManager(), + Clang->getPreprocessor().getHeaderSearchInfo())); EXPECT_FALSE(Action.Execute()); Action.EndSourceFile(); return Includes; @@ -363,6 +364,38 @@ EXPECT_THAT(collectIncludes().MainFileIncludes, Contains(AllOf(IncludeLine(2), Written("")))); } + +TEST_F(HeadersTest, SelfContainedHeaders) { + // Including through non-builtin file has no effects. + FS.Files[MainFile] = R"cpp( +#include "includeguarded.h" +#include "nonguarded.h" +#include "pp_depend.h" +#include "pragmaguarded.h" +)cpp"; + FS.Files["pragmaguarded.h"] = R"cpp( +#pragma once +)cpp"; + FS.Files["includeguarded.h"] = R"cpp( +#ifndef INCLUDE_GUARDED_H +#define INCLUDE_GUARDED_H +void foo(); +#endif // INCLUDE_GUARDED_H +)cpp"; + FS.Files["nonguarded.h"] = R"cpp( +)cpp"; + FS.Files["pp_depend.h"] = R"cpp( + #ifndef REQUIRED_PP_DIRECTIVE + # error You have to have PP directive set to include this one! + #endif +)cpp"; + + auto Includes = collectIncludes(); + EXPECT_THAT(Includes.SelfContained, + UnorderedElementsAre(getID("pragmaguarded.h", Includes), + getID("includeguarded.h", Includes))); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp --- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp +++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp @@ -281,7 +281,9 @@ auto &SM = AST.getSourceManager(); auto &Includes = AST.getIncludeStructure(); - auto ReferencedFiles = findReferencedFiles(findReferencedLocations(AST), SM); + auto ReferencedFiles = + findReferencedFiles(findReferencedLocations(AST), + AST.getIncludeStructure(), AST.getSourceManager()); llvm::StringSet<> ReferencedFileNames; for (FileID FID : ReferencedFiles) ReferencedFileNames.insert( @@ -303,6 +305,52 @@ EXPECT_THAT(getUnused(AST, ReferencedHeaders), ::testing::IsEmpty()); } +TEST(IncludeCleaner, NonSelfContainedHeaders) { + TestTU TU; + TU.Code = R"cpp( + #include "bar.h" + #include "foo.h" + + int LocalFoo = foo::Variable, + LocalBar = bar::Variable; + )cpp"; + TU.AdditionalFiles["bar.h"] = R"cpp( + #pragma once + namespace bar { + #include "indirection.h" + } + )cpp"; + TU.AdditionalFiles["foo.h"] = R"cpp( + #ifndef FOO_H + #define FOO_H + namespace foo { + #include "unguarded.h" + } + #endif // FOO_H + )cpp"; + TU.AdditionalFiles["indirection.h"] = R"cpp( + #include "unguarded.h" + )cpp"; + TU.AdditionalFiles["unguarded.h"] = R"cpp( + constexpr int Variable = 42; + )cpp"; + + ParsedAST AST = TU.build(); + + auto ReferencedFiles = + findReferencedFiles(findReferencedLocations(AST), + AST.getIncludeStructure(), AST.getSourceManager()); + llvm::StringSet<> ReferencedFileNames; + auto &SM = AST.getSourceManager(); + for (FileID FID : ReferencedFiles) + ReferencedFileNames.insert( + SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename()); + // Note that we have uplifted the referenced files from non self-contained + // headers to header-guarded ones. + EXPECT_THAT(ReferencedFileNames.keys(), + UnorderedElementsAre(testPath("bar.h"), testPath("foo.h"))); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/PreambleTests.cpp b/clang-tools-extra/clangd/unittests/PreambleTests.cpp --- a/clang-tools-extra/clangd/unittests/PreambleTests.cpp +++ b/clang-tools-extra/clangd/unittests/PreambleTests.cpp @@ -83,7 +83,8 @@ } IncludeStructure Includes; Clang->getPreprocessor().addPPCallbacks( - Includes.collect(Clang->getSourceManager())); + Includes.collect(Clang->getSourceManager(), + Clang->getPreprocessor().getHeaderSearchInfo())); if (llvm::Error Err = Action.Execute()) { ADD_FAILURE() << "failed to execute action: " << std::move(Err); return {};