Index: clangd/ClangdUnit.h =================================================================== --- clangd/ClangdUnit.h +++ clangd/ClangdUnit.h @@ -48,6 +48,7 @@ struct PreambleData { PreambleData(PrecompiledPreamble Preamble, std::vector Diags, IncludeStructure Includes, + std::vector MainFileMacros, std::unique_ptr StatCache, CanonicalIncludes CanonIncludes); @@ -57,6 +58,10 @@ // Processes like code completions and go-to-definitions will need #include // information, and their compile action skips preamble range. IncludeStructure Includes; + // Macros defined in the preamble section of the main file. + // Users care about headers vs main-file, not preamble vs non-preamble. + // These should be treated as main-file entities e.g. for code completion. + std::vector MainFileMacros; // Cache of FS operations performed when building the preamble. // When reusing a preamble, this cache can be consumed to save IO. std::unique_ptr StatCache; Index: clangd/ClangdUnit.cpp =================================================================== --- clangd/ClangdUnit.cpp +++ clangd/ClangdUnit.cpp @@ -20,6 +20,8 @@ #include "index/Index.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" @@ -94,6 +96,36 @@ std::vector TopLevelDecls; }; +class CollectMainFileMacros : public PPCallbacks { +public: + explicit CollectMainFileMacros(const SourceManager &SM, + std::vector *Out) + : SM(SM), Out(Out) {} + + void FileChanged(SourceLocation Loc, FileChangeReason, + SrcMgr::CharacteristicKind, FileID Prev) { + InMainFile = SM.isWrittenInMainFile(Loc); + } + + void MacroDefined(const Token &MacroName, const MacroDirective *MD) { + assert(MacroName.is(tok::identifier)); + if (InMainFile) + MainFileMacros.insert(MacroName.getIdentifierInfo()->getName()); + } + + void EndOfMainFile() { + for (const auto& Entry : MainFileMacros) + Out->push_back(Entry.getKey()); + llvm::sort(*Out); + } + + private: + const SourceManager &SM; + bool InMainFile = true; + llvm::StringSet<> MainFileMacros; + std::vector *Out; +}; + class CppFilePreambleCallbacks : public PreambleCallbacks { public: CppFilePreambleCallbacks(PathRef File, PreambleParsedCallback ParsedCallback) @@ -103,6 +135,10 @@ IncludeStructure takeIncludes() { return std::move(Includes); } + std::vector takeMainFileMacros() { + return std::move(MainFileMacros); + } + CanonicalIncludes takeCanonicalIncludes() { return std::move(CanonIncludes); } void AfterExecute(CompilerInstance &CI) override { @@ -118,7 +154,9 @@ std::unique_ptr createPPCallbacks() override { assert(SourceMgr && "SourceMgr must be set at this point"); - return collectIncludeStructureCallback(*SourceMgr, &Includes); + return llvm::make_unique( + collectIncludeStructureCallback(*SourceMgr, &Includes), + llvm::make_unique(*SourceMgr, &MainFileMacros)); } CommentHandler *getCommentHandler() override { @@ -131,6 +169,7 @@ PreambleParsedCallback ParsedCallback; IncludeStructure Includes; CanonicalIncludes CanonIncludes; + std::vector MainFileMacros; std::unique_ptr IWYUHandler = nullptr; SourceManager *SourceMgr = nullptr; }; @@ -459,11 +498,13 @@ PreambleData::PreambleData(PrecompiledPreamble Preamble, std::vector Diags, IncludeStructure Includes, + std::vector MainFileMacros, std::unique_ptr StatCache, CanonicalIncludes CanonIncludes) : Preamble(std::move(Preamble)), Diags(std::move(Diags)), - Includes(std::move(Includes)), StatCache(std::move(StatCache)), - CanonIncludes(std::move(CanonIncludes)) {} + Includes(std::move(Includes)), MainFileMacros(std::move(MainFileMacros)), + StatCache(std::move(StatCache)), CanonIncludes(std::move(CanonIncludes)) { +} ParsedAST::ParsedAST(std::shared_ptr Preamble, std::unique_ptr Clang, @@ -542,7 +583,8 @@ std::vector Diags = PreambleDiagnostics.take(); return std::make_shared( std::move(*BuiltPreamble), std::move(Diags), - SerializedDeclsCollector.takeIncludes(), std::move(StatCache), + SerializedDeclsCollector.takeIncludes(), + SerializedDeclsCollector.takeMainFileMacros(), std::move(StatCache), SerializedDeclsCollector.takeCanonicalIncludes()); } else { elog("Could not build a preamble for file {0}", FileName); Index: clangd/CodeComplete.cpp =================================================================== --- clangd/CodeComplete.cpp +++ clangd/CodeComplete.cpp @@ -44,6 +44,8 @@ #include "clang/Format/Format.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendActions.h" +#include "clang/Lex/ExternalPreprocessorSource.h" +#include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "clang/Sema/DeclSpec.h" @@ -1017,6 +1019,23 @@ llvm::IntrusiveRefCntPtr VFS; }; +void loadMainFilePreambleMacros(const Preprocessor &PP, + const PreambleData &Preamble) { + // The ExternalPreprocessorSource has our macros, if we know where to look. + // We can read all the macros using PreambleMacros->ReadDefinedMacros(), + // but this includes transitively included files, so may deserialize a lot. + ExternalPreprocessorSource *PreambleMacros = PP.getExternalSource(); + // As we have the names of the macros, we can look up their IdentifierInfo + // and then use this to load just the macros we want. + IdentifierInfoLookup *PreambleIdentifiers = + PP.getIdentifierTable().getExternalIdentifierLookup(); + if (!PreambleIdentifiers || !PreambleMacros) + return; + for (const auto& MacroName : Preamble.MainFileMacros) + if (auto *II = PreambleIdentifiers->get(MacroName)) + PreambleMacros->updateOutOfDateIdentifier(*II); +} + // Invokes Sema code completion on a file. // If \p Includes is set, it will be updated based on the compiler invocation. bool semaCodeComplete(std::unique_ptr Consumer, @@ -1058,9 +1077,9 @@ // However, if we're completing *inside* the preamble section of the draft, // overriding the preamble will break sema completion. Fortunately we can just // skip all includes in this case; these completions are really simple. - bool CompletingInPreamble = - ComputePreambleBounds(*CI->getLangOpts(), ContentsBuffer.get(), 0).Size > - Input.Offset; + PreambleBounds PreambleRegion = + ComputePreambleBounds(*CI->getLangOpts(), ContentsBuffer.get(), 0); + bool CompletingInPreamble = PreambleRegion.Size > Input.Offset; // NOTE: we must call BeginSourceFile after prepareCompilerInstance. Otherwise // the remapped buffers do not get freed. IgnoreDiagnostics DummyDiagsConsumer; @@ -1078,6 +1097,14 @@ Input.FileName); return false; } + // Macros can be defined within the preamble region of the main file. + // They don't fall nicely into our index/Sema dichotomy: + // - they're not indexed for completion (they're not available across files) + // - but Sema code complete won't see them: as part of the preamble, they're + // deserialized only when mentioned. + // Force them to be deserialized so SemaCodeComplete sees them. + if (Input.Preamble) + loadMainFilePreambleMacros(Clang->getPreprocessor(), *Input.Preamble); if (Includes) Clang->getPreprocessor().addPPCallbacks( collectIncludeStructureCallback(Clang->getSourceManager(), Includes)); Index: clangd/index/Symbol.h =================================================================== --- clangd/index/Symbol.h +++ clangd/index/Symbol.h @@ -204,10 +204,13 @@ /// This is a deep copy: underlying strings will be owned by the slab. void insert(const Symbol &S); - /// Returns the symbol with an ID, if it exists. Valid until next insert(). + /// Removes the symbol with an ID, if it exists. + void erase(const SymbolID &ID) { Symbols.erase(ID); } + + /// Returns the symbol with an ID, if it exists. Valid until insert/remove. const Symbol *find(const SymbolID &ID) { - auto I = SymbolIndex.find(ID); - return I == SymbolIndex.end() ? nullptr : &Symbols[I->second]; + auto I = Symbols.find(ID); + return I == Symbols.end() ? nullptr : &I->second; } /// Consumes the builder to finalize the slab. @@ -217,9 +220,8 @@ llvm::BumpPtrAllocator Arena; /// Intern table for strings. Contents are on the arena. llvm::UniqueStringSaver UniqueStrings; - std::vector Symbols; /// Values are indices into Symbols vector. - llvm::DenseMap SymbolIndex; + llvm::DenseMap Symbols; }; private: Index: clangd/index/Symbol.cpp =================================================================== --- clangd/index/Symbol.cpp +++ clangd/index/Symbol.cpp @@ -48,27 +48,23 @@ } void SymbolSlab::Builder::insert(const Symbol &S) { - auto R = SymbolIndex.try_emplace(S.ID, Symbols.size()); - if (R.second) { - Symbols.push_back(S); - own(Symbols.back(), UniqueStrings); - } else { - auto &Copy = Symbols[R.first->second] = S; - own(Copy, UniqueStrings); - } + own(Symbols[S.ID] = S, UniqueStrings); } SymbolSlab SymbolSlab::Builder::build() && { - Symbols = {Symbols.begin(), Symbols.end()}; // Force shrink-to-fit. - // Sort symbols so the slab can binary search over them. - llvm::sort(Symbols, + // Sort symbols into vector so the slab can binary search over them. + std::vector SortedSymbols; + SortedSymbols.reserve(Symbols.size()); + for (auto &Entry : Symbols) + SortedSymbols.push_back(std::move(Entry.second)); + llvm::sort(SortedSymbols, [](const Symbol &L, const Symbol &R) { return L.ID < R.ID; }); // We may have unused strings from overwritten symbols. Build a new arena. llvm::BumpPtrAllocator NewArena; llvm::UniqueStringSaver Strings(NewArena); - for (auto &S : Symbols) + for (auto &S : SortedSymbols) own(S, Strings); - return SymbolSlab(std::move(NewArena), std::move(Symbols)); + return SymbolSlab(std::move(NewArena), std::move(SortedSymbols)); } } // namespace clangd Index: clangd/index/SymbolCollector.h =================================================================== --- clangd/index/SymbolCollector.h +++ clangd/index/SymbolCollector.h @@ -125,6 +125,12 @@ // All Symbols collected from the AST. SymbolSlab::Builder Symbols; + // File IDs for Symbol.IncludeHeaders. + // The final spelling is calculated in finish(). + llvm::DenseMap IncludeFiles; + void setIncludeLocation(const Symbol &S, SourceLocation); + // Indexed macros, to be erased if they turned out to be include guards. + llvm::DenseSet IndexedMacros; // All refs collected from the AST. // Only symbols declared in preamble (from #include) and referenced from the // main file will be included. Index: clangd/index/SymbolCollector.cpp =================================================================== --- clangd/index/SymbolCollector.cpp +++ clangd/index/SymbolCollector.cpp @@ -24,6 +24,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" #include "clang/Index/IndexSymbol.h" +#include "clang/Index/IndexingAction.h" #include "clang/Index/USRGeneration.h" #include "clang/Lex/Preprocessor.h" #include "llvm/Support/Casting.h" @@ -351,9 +352,8 @@ const auto &SM = PP->getSourceManager(); auto DefLoc = MI->getDefinitionLoc(); - // Header guards are not interesting in index. Builtin macros don't have - // useful locations and are not needed for code completions. - if (MI->isUsedForHeaderGuard() || MI->isBuiltinMacro()) + // Builtin macros don't have useful locations and aren't needed in completion. + if (MI->isBuiltinMacro()) return true; // Skip main-file symbols if we are not collecting them. @@ -407,22 +407,25 @@ std::string Signature; std::string SnippetSuffix; getSignature(*CCS, &Signature, &SnippetSuffix); - - std::string Include; - if (Opts.CollectIncludePath && shouldCollectIncludePath(S.SymInfo.Kind)) { - if (auto Header = getIncludeHeader( - Name->getName(), SM.getDecomposedExpansionLoc(DefLoc).first)) - Include = std::move(*Header); - } S.Signature = Signature; S.CompletionSnippetSuffix = SnippetSuffix; - if (!Include.empty()) - S.IncludeHeaders.emplace_back(Include, 1); + IndexedMacros.insert(Name); + setIncludeLocation(S, DefLoc); Symbols.insert(S); return true; } +void SymbolCollector::setIncludeLocation(const Symbol &S, + SourceLocation Loc) { + if (Opts.CollectIncludePath) + if (shouldCollectIncludePath(S.SymInfo.Kind)) + // Use the expansion location to get the #include header since this is + // where the symbol is exposed. + IncludeFiles[S.ID] = + PP->getSourceManager().getDecomposedExpansionLoc(Loc).first; +} + void SymbolCollector::finish() { // At the end of the TU, add 1 to the refcount of all referenced symbols. auto IncRef = [this](const SymbolID &ID) { @@ -439,6 +442,14 @@ } if (Opts.CollectMacro) { assert(PP); + // First, drop header guards. We can't identify these until EOF. + for (const IdentifierInfo *II : IndexedMacros) { + if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo()) + if (auto ID = getSymbolID(*II, MI, PP->getSourceManager())) + if (MI->isUsedForHeaderGuard()) + Symbols.erase(*ID); + } + // Now increment refcounts. for (const IdentifierInfo *II : ReferencedMacros) { if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo()) if (auto ID = getSymbolID(*II, MI, PP->getSourceManager())) @@ -446,6 +457,21 @@ } } + // Fill in IncludeHeaders. + // We delay this until end of TU so header guards are all resolved. + // Symbols in slabs aren' mutable, so insert() has to walk all the strings :-( + llvm::SmallString<256> QName; + for (const auto &Entry : IncludeFiles) + if (const Symbol *S = Symbols.find(Entry.first)) { + QName = S->Scope; + QName.append(S->Name); + if (auto Header = getIncludeHeader(QName, Entry.second)) { + Symbol NewSym = *S; + NewSym.IncludeHeaders.push_back({*Header, 1}); + Symbols.insert(NewSym); + } + } + const auto &SM = ASTCtx->getSourceManager(); llvm::DenseMap URICache; auto GetURI = [&](FileID FID) -> llvm::Optional { @@ -463,7 +489,7 @@ } return Found->second; }; - + // Populate Refs slab from DeclRefs. if (auto MainFileURI = GetURI(SM.getMainFileID())) { for (const auto &It : DeclRefs) { if (auto ID = getSymbolID(It.first)) { @@ -491,6 +517,7 @@ DeclRefs.clear(); FilesToIndexCache.clear(); HeaderIsSelfContainedCache.clear(); + IncludeFiles.clear(); } const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID, @@ -555,17 +582,6 @@ std::string ReturnType = getReturnType(*CCS); S.ReturnType = ReturnType; - std::string Include; - if (Opts.CollectIncludePath && shouldCollectIncludePath(S.SymInfo.Kind)) { - // Use the expansion location to get the #include header since this is - // where the symbol is exposed. - if (auto Header = getIncludeHeader( - QName, SM.getDecomposedExpansionLoc(ND.getLocation()).first)) - Include = std::move(*Header); - } - if (!Include.empty()) - S.IncludeHeaders.emplace_back(Include, 1); - llvm::Optional TypeStorage; if (S.Flags & Symbol::IndexedForCodeCompletion) { TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion); @@ -574,6 +590,7 @@ } Symbols.insert(S); + setIncludeLocation(S, ND.getLocation()); return Symbols.find(S.ID); } Index: clangd/unittests/CodeCompleteTests.cpp =================================================================== --- clangd/unittests/CodeCompleteTests.cpp +++ clangd/unittests/CodeCompleteTests.cpp @@ -2073,19 +2073,28 @@ UnorderedElementsAre(Named("Clangd_Macro_Test"))); } -TEST(CompletionTest, NoMacroFromPreambleIfIndexIsSet) { +TEST(CompletionTest, MacroFromPreamble) { + MockFSProvider FS; + MockCompilationDatabase CDB; + std::string FooHeader = testPath("foo.h"); + FS.Files[FooHeader] = "#define CLANGD_PREAMBLE_HEADER x\n"; + IgnoreDiagnostics DiagConsumer; + ClangdServer Server(CDB, FS, DiagConsumer, ClangdServer::optsForTest()); auto Results = completions( - R"cpp(#define CLANGD_PREAMBLE x + R"cpp(#include "foo.h" + #define CLANGD_PREAMBLE_MAIN x int x = 0; #define CLANGD_MAIN x void f() { CLANGD_^ } )cpp", {func("CLANGD_INDEX")}); - // Index is overriden in code completion options, so the preamble symbol is - // not seen. - EXPECT_THAT(Results.Completions, UnorderedElementsAre(Named("CLANGD_MAIN"), - Named("CLANGD_INDEX"))); + // We should get results from the main file, including the preamble section. + // However no results from included files (the index should cover them). + EXPECT_THAT(Results.Completions, + UnorderedElementsAre(Named("CLANGD_PREAMBLE_MAIN"), + Named("CLANGD_MAIN"), + Named("CLANGD_INDEX"))); } TEST(CompletionTest, DeprecatedResults) { Index: clangd/unittests/SymbolCollectorTests.cpp =================================================================== --- clangd/unittests/SymbolCollectorTests.cpp +++ clangd/unittests/SymbolCollectorTests.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/VirtualFileSystem.h" +#include "gmock/gmock-matchers.h" #include "gmock/gmock-more-matchers.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -34,9 +35,9 @@ using testing::_; using testing::AllOf; using testing::Contains; +using testing::Each; using testing::ElementsAre; using testing::Field; -using testing::IsEmpty; using testing::Not; using testing::Pair; using testing::UnorderedElementsAre; @@ -1028,6 +1029,26 @@ Not(IncludeHeader())))); } +// Features that depend on header-guards are fragile. Header guards are only +// recognized when the file ends, so we have to defer checking for them. +TEST_F(SymbolCollectorTest, HeaderGuardDetected) { + CollectorOpts.CollectIncludePath = true; + CollectorOpts.CollectMacro = true; + runSymbolCollector(R"cpp( + #ifndef HEADER_GUARD_ + #define HEADER_GUARD_ + + // Symbols are seen before the header guard is complete. + #define MACRO + int decl(); + + #endif // Header guard is recognized here. + )cpp", + ""); + EXPECT_THAT(Symbols, Not(Contains(QName("HEADER_GUARD_")))); + EXPECT_THAT(Symbols, Each(IncludeHeader())); +} + TEST_F(SymbolCollectorTest, NonModularHeader) { auto TU = TestTU::withHeaderCode("int x();"); EXPECT_THAT(TU.headerSymbols(), ElementsAre(IncludeHeader()));