Index: clangd/index/SymbolCollector.h =================================================================== --- clangd/index/SymbolCollector.h +++ clangd/index/SymbolCollector.h @@ -19,6 +19,7 @@ #include "clang/Index/IndexSymbol.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Regex.h" #include namespace clang { @@ -117,6 +118,15 @@ bool IsMainFileSymbol); void addDefinition(const NamedDecl &, const Symbol &DeclSymbol); + llvm::Optional getIncludeHeader(llvm::StringRef QName, FileID); + bool isSelfContainedHeader(FileID); + // Heuristic to detect headers that aren't self-contained, usually because + // they need to be included via an umbrella header. e.g. GTK matches this. + llvm::Regex DontIncludeMe = { + "^[ \t]*#[ \t]*if.*\n" // An #if, #ifndef etc directive, then + "[ \t]*#[ \t]*error.*include", // an #error directive mentioning "include" + llvm::Regex::Newline}; + // All Symbols collected from the AST. SymbolSlab::Builder Symbols; // All refs collected from the AST. @@ -141,6 +151,7 @@ llvm::DenseMap CanonicalDecls; // Cache whether to index a file or not. llvm::DenseMap FilesToIndexCache; + llvm::DenseMap HeaderIsSelfContainedCache; }; } // namespace clangd Index: clangd/index/SymbolCollector.cpp =================================================================== --- clangd/index/SymbolCollector.cpp +++ clangd/index/SymbolCollector.cpp @@ -128,48 +128,6 @@ } } -bool isSelfContainedHeader(FileID FID, const SourceManager &SM, - const Preprocessor &PP) { - const FileEntry *FE = SM.getFileEntryForID(FID); - if (!FE) - return false; - return PP.getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE); -} - -/// Gets a canonical include (URI of the header or
or "header") for -/// header of \p FID (which should usually be the *expansion* file). -/// Returns None if includes should not be inserted for this file. -llvm::Optional -getIncludeHeader(llvm::StringRef QName, const SourceManager &SM, - const Preprocessor &PP, FileID FID, - const SymbolCollector::Options &Opts) { - const FileEntry *FE = SM.getFileEntryForID(FID); - if (!FE || FE->getName().empty()) - return llvm::None; - llvm::StringRef Filename = FE->getName(); - // If a file is mapped by canonical headers, use that mapping, regardless - // of whether it's an otherwise-good header (header guards etc). - if (Opts.Includes) { - llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName); - // If we had a mapping, always use it. - if (Canonical.startswith("<") || Canonical.startswith("\"")) - return Canonical.str(); - if (Canonical != Filename) - return toURI(SM, Canonical, Opts); - } - if (!isSelfContainedHeader(FID, SM, PP)) { - // A .inc or .def file is often included into a real header to define - // symbols (e.g. LLVM tablegen files). - if (Filename.endswith(".inc") || Filename.endswith(".def")) - return getIncludeHeader(QName, SM, PP, - SM.getFileID(SM.getIncludeLoc(FID)), Opts); - // Conservatively refuse to insert #includes to files without guards. - return llvm::None; - } - // Standard case: just insert the file itself. - return toURI(SM, Filename, Opts); -} - // Return the symbol range of the token at \p TokLoc. std::pair getTokenRange(SourceLocation TokLoc, const SourceManager &SM, @@ -452,9 +410,8 @@ std::string Include; if (Opts.CollectIncludePath && shouldCollectIncludePath(S.SymInfo.Kind)) { - if (auto Header = - getIncludeHeader(Name->getName(), SM, *PP, - SM.getDecomposedExpansionLoc(DefLoc).first, Opts)) + if (auto Header = getIncludeHeader( + Name->getName(), SM.getDecomposedExpansionLoc(DefLoc).first)) Include = std::move(*Header); } S.Signature = Signature; @@ -533,6 +490,7 @@ ReferencedMacros.clear(); DeclRefs.clear(); FilesToIndexCache.clear(); + HeaderIsSelfContainedCache.clear(); } const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID, @@ -602,8 +560,7 @@ // Use the expansion location to get the #include header since this is // where the symbol is exposed. if (auto Header = getIncludeHeader( - QName, SM, *PP, - SM.getDecomposedExpansionLoc(ND.getLocation()).first, Opts)) + QName, SM.getDecomposedExpansionLoc(ND.getLocation()).first)) Include = std::move(*Header); } if (!Include.empty()) @@ -639,5 +596,57 @@ Symbols.insert(S); } +/// Gets a canonical include (URI of the header or
or "header") for +/// header of \p FID (which should usually be the *expansion* file). +/// Returns None if includes should not be inserted for this file. +llvm::Optional +SymbolCollector::getIncludeHeader(llvm::StringRef QName, FileID FID) { + const SourceManager &SM = ASTCtx->getSourceManager(); + const FileEntry *FE = SM.getFileEntryForID(FID); + if (!FE || FE->getName().empty()) + return llvm::None; + llvm::StringRef Filename = FE->getName(); + // If a file is mapped by canonical headers, use that mapping, regardless + // of whether it's an otherwise-good header (header guards etc). + if (Opts.Includes) { + llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName); + // If we had a mapping, always use it. + if (Canonical.startswith("<") || Canonical.startswith("\"")) + return Canonical.str(); + if (Canonical != Filename) + return toURI(SM, Canonical, Opts); + } + if (!isSelfContainedHeader(FID)) { + // A .inc or .def file is often included into a real header to define + // symbols (e.g. LLVM tablegen files). + if (Filename.endswith(".inc") || Filename.endswith(".def")) + return getIncludeHeader(QName, SM.getFileID(SM.getIncludeLoc(FID))); + // Conservatively refuse to insert #includes to files without guards. + return llvm::None; + } + // Standard case: just insert the file itself. + return toURI(SM, Filename, Opts); +} + +bool SymbolCollector::isSelfContainedHeader(FileID FID) { + // The real computation (which will be memoized). + auto Compute = [&] { + const SourceManager &SM = ASTCtx->getSourceManager(); + const FileEntry *FE = SM.getFileEntryForID(FID); + if (!FE) + return false; + if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE)) + return false; + if (DontIncludeMe.match(SM.getBufferData(FID))) + return false; + return true; + }; + + auto R = HeaderIsSelfContainedCache.try_emplace(FID, false); + if (R.second) + R.first->second = Compute(); + return R.first->second; +} + } // namespace clangd } // namespace clang Index: unittests/clangd/SymbolCollectorTests.cpp =================================================================== --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -272,8 +272,8 @@ Args, Factory->create(), Files.get(), std::make_shared()); - InMemoryFileSystem->addFile(TestHeaderName, 0, - llvm::MemoryBuffer::getMemBuffer(HeaderCode)); + InMemoryFileSystem->addFile( + TestHeaderName, 0, llvm::MemoryBuffer::getMemBuffer(HeaderCode)); InMemoryFileSystem->addFile(TestFileName, 0, llvm::MemoryBuffer::getMemBuffer(MainCode)); Invocation.run(); @@ -1064,8 +1064,19 @@ auto TU = TestTU::withHeaderCode("int x();"); EXPECT_THAT(TU.headerSymbols(), ElementsAre(IncludeHeader())); + // Files missing include guards aren't eligible for insertion. TU.ImplicitHeaderGuard = false; EXPECT_THAT(TU.headerSymbols(), ElementsAre(Not(IncludeHeader()))); + + // We recognize some patterns of trying to prevent insertion. + TU = TestTU::withHeaderCode(R"cpp( +#ifndef SECRET +#error "This file isn't safe to include directly" +#endif + int x(); + )cpp"); + TU.ExtraArgs.push_back("-DSECRET"); // *we're* able to include it. + EXPECT_THAT(TU.headerSymbols(), ElementsAre(Not(IncludeHeader()))); } TEST_F(SymbolCollectorTest, AvoidUsingFwdDeclsAsCanonicalDecls) {