diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -368,7 +368,7 @@ if (Preamble) CanonIncludes = Preamble->CanonIncludes; else - addSystemHeadersMapping(&CanonIncludes, Clang->getLangOpts()); + CanonIncludes.addSystemHeadersMapping(Clang->getLangOpts()); std::unique_ptr IWYUHandler = collectIWYUHeaderMaps(&CanonIncludes); Clang->getPreprocessor().addCommentHandler(IWYUHandler.get()); diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -78,7 +78,7 @@ } void BeforeExecute(CompilerInstance &CI) override { - addSystemHeadersMapping(&CanonIncludes, CI.getLangOpts()); + CanonIncludes.addSystemHeadersMapping(CI.getLangOpts()); SourceMgr = &CI.getSourceManager(); } diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.h b/clang-tools-extra/clangd/index/CanonicalIncludes.h --- a/clang-tools-extra/clangd/index/CanonicalIncludes.h +++ b/clang-tools-extra/clangd/index/CanonicalIncludes.h @@ -29,6 +29,8 @@ namespace clang { namespace clangd { +class StdIncludes; + /// Maps a definition location onto an #include file, based on a set of filename /// rules. /// Only const methods (i.e. mapHeader) in this class are thread safe. @@ -36,35 +38,30 @@ public: CanonicalIncludes() = default; - /// Adds a string-to-string mapping from \p Path to \p CanonicalPath. - void addMapping(llvm::StringRef Path, llvm::StringRef CanonicalPath); - - /// Maps files with last path components matching \p Suffix to \p - /// CanonicalPath. - void addPathSuffixMapping(llvm::StringRef Suffix, - llvm::StringRef CanonicalPath); - - /// Sets the canonical include for any symbol with \p QualifiedName. - /// Symbol mappings take precedence over header mappings. - void addSymbolMapping(llvm::StringRef QualifiedName, - llvm::StringRef CanonicalPath); - /// Returns the canonical include for symbol with \p QualifiedName. /// \p Header is the file the declaration was reachable from. /// Header itself will be returned if there is no relevant mapping. llvm::StringRef mapHeader(llvm::StringRef Header, llvm::StringRef QualifiedName) const; + /// Adds a string-to-string mapping from \p Path to \p CanonicalPath. + void addMapping(llvm::StringRef Path, llvm::StringRef CanonicalPath); + + /// Adds mapping for system headers and some special symbols (e.g. STL symbols + /// in need to be mapped individually). Approximately, the following + /// system headers are handled: + /// - C++ standard library e.g. bits/basic_string.h$ -> + /// - Posix library e.g. bits/pthreadtypes.h$ -> + /// - Compiler extensions, e.g. include/avx512bwintrin.h$ -> + /// The mapping is hardcoded and hand-maintained, so it might not cover all + /// headers. + void addSystemHeadersMapping(const LangOptions &Language); + private: /// A map from full include path to a canonical path. llvm::StringMap FullPathMapping; - /// A map from a suffix (one or components of a path) to a canonical path. - llvm::StringMap SuffixHeaderMapping; - /// Maximum number of path components stored in a key of SuffixHeaderMapping. - /// Used to reduce the number of lookups into SuffixHeaderMapping. - int MaxSuffixComponents = 0; - /// A map from fully qualified symbol names to header names. - llvm::StringMap SymbolMapping; + /// Precomputed mapping of std symbols. Ignored when null. + const StdIncludes *SystemSymbols = nullptr; }; /// Returns a CommentHandler that parses pragma comment on include files to @@ -76,16 +73,43 @@ std::unique_ptr collectIWYUHeaderMaps(CanonicalIncludes *Includes); -/// Adds mapping for system headers and some special symbols (e.g. STL symbols -/// in need to be mapped individually). Approximately, the following -/// system headers are handled: -/// - C++ standard library e.g. bits/basic_string.h$ -> -/// - Posix library e.g. bits/pthreadtypes.h$ -> -/// - Compiler extensions, e.g. include/avx512bwintrin.h$ -> -/// The mapping is hardcoded and hand-maintained, so it might not cover all -/// headers. -void addSystemHeadersMapping(CanonicalIncludes *Includes, - const LangOptions &Language); +/// Exposed only for unit tests, an implementation detail of CanonicalIncludes. +/// Handles mapping standard symbols and headers. +class StdIncludes { +public: + StdIncludes() = default; + + enum class SymbolsFlavour; + /// Populates the mapping for a precomputed set of symbols. This constructor + /// is expensive, avoid calling it too much. + StdIncludes(SymbolsFlavour Language); + + /// Try to find a corresponding standard library file to #include for file \p + /// Header and symbol named \p QualifiedName. + /// If the inputs are not recognized as part of the standard library, returns + /// None. + llvm::Optional + mapHeader(llvm::StringRef Header, llvm::StringRef QualifiedName) const; + + /// Sets the canonical include for any symbol with \p QualifiedName. + /// Symbol mappings take precedence over header mappings. + void addSymbolMapping(llvm::StringRef QualifiedName, + llvm::StringRef CanonicalPath); + + /// Maps files with last path components matching \p Suffix to \p + /// CanonicalPath. + void addPathSuffixMapping(llvm::StringRef Suffix, + llvm::StringRef CanonicalPath); + +private: + /// A map from fully qualified symbol names to header names. + llvm::StringMap SymbolMapping; + /// A map from a suffix (one or components of a path) to a canonical path. + llvm::StringMap SuffixHeaderMapping; + /// Maximum number of path components stored in a key of SuffixHeaderMapping. + /// Used to reduce the number of lookups into SuffixHeaderMapping. + int MaxSuffixComponents = 0; +}; } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp --- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp +++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp @@ -8,105 +8,70 @@ #include "CanonicalIncludes.h" #include "Headers.h" +#include "clang/Basic/LangOptions.h" #include "clang/Driver/Types.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" #include +#include namespace clang { namespace clangd { -namespace { -const char IWYUPragma[] = "// IWYU pragma: private, include "; -} // namespace - -void CanonicalIncludes::addPathSuffixMapping(llvm::StringRef Suffix, - llvm::StringRef CanonicalPath) { - int Components = std::distance(llvm::sys::path::begin(Suffix), - llvm::sys::path::end(Suffix)); - MaxSuffixComponents = std::max(MaxSuffixComponents, Components); - SuffixHeaderMapping[Suffix] = CanonicalPath; -} -void CanonicalIncludes::addMapping(llvm::StringRef Path, - llvm::StringRef CanonicalPath) { - FullPathMapping[Path] = CanonicalPath; -} - -void CanonicalIncludes::addSymbolMapping(llvm::StringRef QualifiedName, - llvm::StringRef CanonicalPath) { - this->SymbolMapping[QualifiedName] = CanonicalPath; +enum class StdIncludes::SymbolsFlavour { Cpp, C11, Other }; +namespace { +StdIncludes::SymbolsFlavour symbolsFlavour(const LangOptions &Language) { + if (Language.CPlusPlus) + return StdIncludes::SymbolsFlavour::Cpp; + if (Language.C11) + return StdIncludes::SymbolsFlavour::C11; + return StdIncludes::SymbolsFlavour::Other; } +const StdIncludes &getStandardSymbolMapping(const LangOptions &Opts) { + // FIXME: suffix path mappings can be reused across all three classes. + static const auto *ForCpp = new StdIncludes(StdIncludes::SymbolsFlavour::Cpp); + static const auto *ForC11 = new StdIncludes(StdIncludes::SymbolsFlavour::C11); + static const auto *ForOther = + new StdIncludes(StdIncludes::SymbolsFlavour::Other); -llvm::StringRef -CanonicalIncludes::mapHeader(llvm::StringRef Header, - llvm::StringRef QualifiedName) const { - assert(!Header.empty()); - auto SE = SymbolMapping.find(QualifiedName); - if (SE != SymbolMapping.end()) - return SE->second; - - auto MapIt = FullPathMapping.find(Header); - if (MapIt != FullPathMapping.end()) - return MapIt->second; - - int Components = 1; - for (auto It = llvm::sys::path::rbegin(Header), - End = llvm::sys::path::rend(Header); - It != End && Components <= MaxSuffixComponents; ++It, ++Components) { - auto SubPath = Header.substr(It->data() - Header.begin()); - auto MappingIt = SuffixHeaderMapping.find(SubPath); - if (MappingIt != SuffixHeaderMapping.end()) - return MappingIt->second; + switch (symbolsFlavour(Opts)) { + case StdIncludes::SymbolsFlavour::Cpp: + return *ForCpp; + case StdIncludes::SymbolsFlavour::C11: + return *ForC11; + case StdIncludes::SymbolsFlavour::Other: + return *ForOther; } - return Header; + llvm_unreachable("unhandled SymbolFlavour"); } - -std::unique_ptr -collectIWYUHeaderMaps(CanonicalIncludes *Includes) { - class PragmaCommentHandler : public clang::CommentHandler { - public: - PragmaCommentHandler(CanonicalIncludes *Includes) : Includes(Includes) {} - - bool HandleComment(Preprocessor &PP, SourceRange Range) override { - llvm::StringRef Text = - Lexer::getSourceText(CharSourceRange::getCharRange(Range), - PP.getSourceManager(), PP.getLangOpts()); - if (!Text.consume_front(IWYUPragma)) - return false; - // FIXME(ioeric): resolve the header and store actual file path. For now, - // we simply assume the written header is suitable to be #included. - Includes->addMapping(PP.getSourceManager().getFilename(Range.getBegin()), - isLiteralInclude(Text) ? Text.str() - : ("\"" + Text + "\"").str()); - return false; - } - - private: - CanonicalIncludes *const Includes; - }; - return std::make_unique(Includes); } -void addSystemHeadersMapping(CanonicalIncludes *Includes, - const LangOptions &Language) { +StdIncludes::StdIncludes(SymbolsFlavour Language) { static constexpr std::pair SymbolMap[] = { -#define SYMBOL(Name, NameSpace, Header) { #NameSpace#Name, #Header }, - #include "StdSymbolMap.inc" +#define SYMBOL(Name, NameSpace, Header) {#NameSpace #Name, #Header}, +#include "StdSymbolMap.inc" #undef SYMBOL }; static constexpr std::pair CSymbolMap[] = { -#define SYMBOL(Name, NameSpace, Header) { #Name, #Header }, - #include "CSymbolMap.inc" +#define SYMBOL(Name, NameSpace, Header) {#Name, #Header}, +#include "CSymbolMap.inc" #undef SYMBOL }; - if (Language.CPlusPlus) { + switch (Language) { + case SymbolsFlavour::Cpp: for (const auto &Pair : SymbolMap) - Includes->addSymbolMapping(Pair.first, Pair.second); - } else if (Language.C11) { + addSymbolMapping(Pair.first, Pair.second); + break; + case SymbolsFlavour::C11: for (const auto &Pair : CSymbolMap) - Includes->addSymbolMapping(Pair.first, Pair.second); + addSymbolMapping(Pair.first, Pair.second); + break; + case SymbolsFlavour::Other: + break; } - // FIXME: remove the std header mapping once we support ambiguous symbols, now - // it serves as a fallback to disambiguate: + // FIXME: remove the std header mapping once we support ambiguous symbols, + // now it serves as a fallback to disambiguate: // - symbols with multiple headers (e.g. std::move) static constexpr std::pair SystemHeaderMap[] = { {"include/__stddef_max_align_t.h", ""}, @@ -762,7 +727,97 @@ {"bits/xtitypes.h", ""}, }; for (const auto &Pair : SystemHeaderMap) - Includes->addPathSuffixMapping(Pair.first, Pair.second); + addPathSuffixMapping(Pair.first, Pair.second); +} + +llvm::Optional +StdIncludes::mapHeader(llvm::StringRef Header, + llvm::StringRef QualifiedName) const { + auto SE = SymbolMapping.find(QualifiedName); + if (SE != SymbolMapping.end()) + return llvm::StringRef(SE->second); + + int Components = 1; + for (auto It = llvm::sys::path::rbegin(Header), + End = llvm::sys::path::rend(Header); + It != End && Components <= MaxSuffixComponents; ++It, ++Components) { + auto SubPath = Header.substr(It->data() - Header.begin()); + auto MappingIt = SuffixHeaderMapping.find(SubPath); + if (MappingIt != SuffixHeaderMapping.end()) + return llvm::StringRef(MappingIt->second); + } + + return llvm::None; +} + +void StdIncludes::addSymbolMapping(llvm::StringRef QualifiedName, + llvm::StringRef CanonicalPath) { + SymbolMapping[QualifiedName] = CanonicalPath; +} + +void StdIncludes::addPathSuffixMapping(llvm::StringRef Suffix, + llvm::StringRef CanonicalPath) { + int Components = std::distance(llvm::sys::path::begin(Suffix), + llvm::sys::path::end(Suffix)); + MaxSuffixComponents = std::max(MaxSuffixComponents, Components); + SuffixHeaderMapping[Suffix] = CanonicalPath; +} + +namespace { +const char IWYUPragma[] = "// IWYU pragma: private, include "; +} // namespace + +void CanonicalIncludes::addMapping(llvm::StringRef Path, + llvm::StringRef CanonicalPath) { + FullPathMapping[Path] = CanonicalPath; +} + +llvm::StringRef +CanonicalIncludes::mapHeader(llvm::StringRef Header, + llvm::StringRef QualifiedName) const { + assert(!Header.empty()); + // Prefer a mapping from the system symbols. + if (SystemSymbols) { + if (auto Result = SystemSymbols->mapHeader(Header, QualifiedName)) + return *Result; + } + + auto MapIt = FullPathMapping.find(Header); + if (MapIt != FullPathMapping.end()) + return MapIt->second; + + return Header; +} + +std::unique_ptr +collectIWYUHeaderMaps(CanonicalIncludes *Includes) { + class PragmaCommentHandler : public clang::CommentHandler { + public: + PragmaCommentHandler(CanonicalIncludes *Includes) : Includes(Includes) {} + + bool HandleComment(Preprocessor &PP, SourceRange Range) override { + llvm::StringRef Text = + Lexer::getSourceText(CharSourceRange::getCharRange(Range), + PP.getSourceManager(), PP.getLangOpts()); + if (!Text.consume_front(IWYUPragma)) + return false; + // FIXME(ioeric): resolve the header and store actual file path. For now, + // we simply assume the written header is suitable to be #included. + Includes->addMapping(PP.getSourceManager().getFilename(Range.getBegin()), + isLiteralInclude(Text) ? Text.str() + : ("\"" + Text + "\"").str()); + return false; + } + + private: + CanonicalIncludes *const Includes; + }; + return std::make_unique(Includes); +} + +void CanonicalIncludes::addSystemHeadersMapping(const LangOptions &Language) { + assert(SystemSymbols == nullptr && "resetting the system headers mapping"); + SystemSymbols = &getStandardSymbolMapping(Language); } } // namespace clangd diff --git a/clang-tools-extra/clangd/index/IndexAction.cpp b/clang-tools-extra/clangd/index/IndexAction.cpp --- a/clang-tools-extra/clangd/index/IndexAction.cpp +++ b/clang-tools-extra/clangd/index/IndexAction.cpp @@ -140,7 +140,7 @@ std::unique_ptr CreateASTConsumer(CompilerInstance &CI, llvm::StringRef InFile) override { CI.getPreprocessor().addCommentHandler(PragmaHandler.get()); - addSystemHeadersMapping(Includes.get(), CI.getLangOpts()); + Includes->addSystemHeadersMapping(CI.getLangOpts()); if (IncludeGraphCallback != nullptr) CI.getPreprocessor().addPPCallbacks( std::make_unique(CI.getSourceManager(), IG)); diff --git a/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp b/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp --- a/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp +++ b/clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp @@ -7,17 +7,22 @@ //===----------------------------------------------------------------------===// #include "index/CanonicalIncludes.h" +#include "clang/Basic/LangOptions.h" +#include "llvm/Testing/Support/SupportHelpers.h" #include "gtest/gtest.h" namespace clang { namespace clangd { namespace { +using llvm::ValueIs; +using ::testing::Eq; + TEST(CanonicalIncludesTest, CStandardLibrary) { CanonicalIncludes CI; auto Language = LangOptions(); Language.C11 = true; - addSystemHeadersMapping(&CI, Language); + CI.addSystemHeadersMapping(Language); // Usual standard library symbols are mapped correctly. EXPECT_EQ("", CI.mapHeader("path/stdio.h", "printf")); } @@ -26,7 +31,7 @@ CanonicalIncludes CI; auto Language = LangOptions(); Language.CPlusPlus = true; - addSystemHeadersMapping(&CI, Language); + CI.addSystemHeadersMapping(Language); // Usual standard library symbols are mapped correctly. EXPECT_EQ("", CI.mapHeader("path/vector.h", "std::vector")); @@ -53,20 +58,24 @@ TEST(CanonicalIncludesTest, SymbolMapping) { // As used for standard library. - CanonicalIncludes CI; - CI.addSymbolMapping("some::symbol", ""); + StdIncludes SI; + SI.addSymbolMapping("some::symbol", ""); - EXPECT_EQ("", CI.mapHeader("foo/bar", "some::symbol")); - EXPECT_EQ("foo/bar", CI.mapHeader("foo/bar", "other::symbol")); + EXPECT_THAT(SI.mapHeader("foo/bar", "some::symbol"), + ValueIs(StringRef(""))); + EXPECT_THAT(SI.mapHeader("foo/bar", "other::symbol"), Eq(llvm::None)); } TEST(CanonicalIncludesTest, Precedence) { CanonicalIncludes CI; CI.addMapping("some/path", ""); - CI.addSymbolMapping("some::symbol", ""); + + LangOptions Language; + Language.CPlusPlus = true; + CI.addSystemHeadersMapping(Language); // Symbol mapping beats path mapping. - EXPECT_EQ("", CI.mapHeader("some/path", "some::symbol")); + EXPECT_EQ("", CI.mapHeader("some/path", "std::vector")); } } // namespace diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -973,7 +973,7 @@ CanonicalIncludes Includes; auto Language = LangOptions(); Language.CPlusPlus = true; - addSystemHeadersMapping(&Includes, Language); + Includes.addSystemHeadersMapping(Language); CollectorOpts.Includes = &Includes; runSymbolCollector("namespace std { class string {}; }", /*Main=*/""); EXPECT_THAT(Symbols,