diff --git a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp --- a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp +++ b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp @@ -8,6 +8,7 @@ #include "clang/Tooling/Inclusions/StandardLibrary.h" #include "clang/AST/Decl.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" @@ -16,8 +17,9 @@ namespace stdlib { static llvm::StringRef *HeaderNames; -static std::pair *SymbolNames; -static unsigned *SymbolHeaderIDs; +static llvm::DenseMap> + *SymbolNames; +static llvm::SmallVector *SymbolHeaderIDs; static llvm::DenseMap *HeaderIDs; // Maps symbol name -> Symbol::ID, within a namespace. using NSSymbolMap = llvm::DenseMap; @@ -29,7 +31,7 @@ #include "clang/Tooling/Inclusions/CSymbolMap.inc" #include "clang/Tooling/Inclusions/StdSymbolMap.inc" #undef SYMBOL - SymbolNames = new std::remove_reference_t[SymCount]; + SymbolNames = new std::remove_reference_t; SymbolHeaderIDs = new std::remove_reference_t[SymCount]; NamespaceSymbols = new std::remove_reference_t; @@ -46,18 +48,31 @@ return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second; }; - auto Add = [&, SymIndex(0)](llvm::StringRef Name, llvm::StringRef NS, - llvm::StringRef HeaderName) mutable { + auto Add = [&, NextAvailSymIndex(0)](llvm::StringRef Name, llvm::StringRef NS, + llvm::StringRef HeaderName) mutable { if (NS == "None") NS = ""; - SymbolNames[SymIndex] = {NS, Name}; - SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName); + bool IsNewSymbol = true; + int SymIndex = NextAvailSymIndex; + if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(NS)) { + auto It = NSSymbols->find(Name); + if (It != NSSymbols->end()) { + SymIndex = It->getSecond(); + IsNewSymbol = false; + } + } + + SymbolNames->try_emplace(SymIndex, std::make_pair(NS, Name)); + + unsigned HeaderID = AddHeader(HeaderName); + SymbolHeaderIDs[SymIndex].emplace_back(HeaderID); NSSymbolMap &NSSymbols = AddNS(NS); NSSymbols.try_emplace(Name, SymIndex); - ++SymIndex; + if (IsNewSymbol) + ++NextAvailSymIndex; }; #define SYMBOL(Name, NS, Header) Add(#Name, #NS, #Header); #include "clang/Tooling/Inclusions/CSymbolMap.inc" @@ -84,8 +99,20 @@ return Header(It->second); } llvm::StringRef Header::name() const { return HeaderNames[ID]; } -llvm::StringRef Symbol::scope() const { return SymbolNames[ID].first; } -llvm::StringRef Symbol::name() const { return SymbolNames[ID].second; } +llvm::StringRef Symbol::scope() const { + auto It = SymbolNames->find(ID); + if (It != SymbolNames->end()) { + return It->second.first; + } + return ""; +} +llvm::StringRef Symbol::name() const { + auto It = SymbolNames->find(ID); + if (It != SymbolNames->end()) { + return It->second.second; + } + return ""; +} std::optional Symbol::named(llvm::StringRef Scope, llvm::StringRef Name) { ensureInitialized(); @@ -96,9 +123,14 @@ } return std::nullopt; } -Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); } + +Header Symbol::header() const { return Header(SymbolHeaderIDs[ID][0]); } llvm::SmallVector
Symbol::headers() const { - return {header()}; // FIXME: multiple in case of ambiguity + llvm::SmallVector
result; + for (auto HeaderID : SymbolHeaderIDs[ID]) { + result.emplace_back(Header(HeaderID)); + } + return result; } Recognizer::Recognizer() { ensureInitialized(); } diff --git a/clang/tools/include-mapping/cppreference_parser.py b/clang/tools/include-mapping/cppreference_parser.py --- a/clang/tools/include-mapping/cppreference_parser.py +++ b/clang/tools/include-mapping/cppreference_parser.py @@ -93,7 +93,7 @@ abs() (int)
acos()
- Returns a list of tuple (symbol_name, relative_path_to_symbol_page, variant). + Returns a list of tuple (symbol_name, relative_path_to_symbol_page). """ symbols = [] soup = BeautifulSoup(index_page_html, "html.parser") @@ -103,13 +103,10 @@ # This accidentally accepts begin/end despite the (iterator) caption: the # (since C++11) note is first. They are good symbols, so the bug is unfixed. caption = symbol_href.next_sibling - variant = None - if isinstance(caption, NavigableString) and "(" in caption: - variant = caption.text.strip(" ()") symbol_tt = symbol_href.find("tt") if symbol_tt: symbols.append((symbol_tt.text.rstrip("<>()"), # strip any trailing <>() - symbol_href["href"], variant)) + symbol_href["href"])) return symbols @@ -118,7 +115,7 @@ return _ParseSymbolPage(f.read(), name) -def _GetSymbols(pool, root_dir, index_page_name, namespace, variants_to_accept): +def _GetSymbols(pool, root_dir, index_page_name, namespace): """Get all symbols listed in the index page. All symbols should be in the given namespace. @@ -134,13 +131,7 @@ with open(index_page_path, "r") as f: # Read each symbol page in parallel. results = [] # (symbol_name, promise of [header...]) - for symbol_name, symbol_page_path, variant in _ParseIndexPage(f.read()): - # Variant symbols (e.g. the std::locale version of isalpha) add ambiguity. - # FIXME: use these as a fallback rather than ignoring entirely. - variants_for_symbol = variants_to_accept.get( - (namespace or "") + symbol_name, ()) - if variant and variant not in variants_for_symbol: - continue + for symbol_name, symbol_page_path in _ParseIndexPage(f.read()): path = os.path.join(root_dir, symbol_page_path) if os.path.isfile(path): results.append((symbol_name, @@ -166,13 +157,6 @@ Args: parse_pages: a list of tuples (page_root_dir, index_page_name, namespace) """ - # By default we prefer the non-variant versions, as they're more common. But - # there are some symbols, whose variant is more common. This list describes - # those symbols. - variants_to_accept = { - # std::remove<> has variant algorithm. - "std::remove": ("algorithm"), - } symbols = [] # Run many workers to process individual symbol pages under the symbol index. # Don't allow workers to capture Ctrl-C. @@ -180,8 +164,7 @@ initializer=lambda: signal.signal(signal.SIGINT, signal.SIG_IGN)) try: for root_dir, page_name, namespace in parse_pages: - symbols.extend(_GetSymbols(pool, root_dir, page_name, namespace, - variants_to_accept)) + symbols.extend(_GetSymbols(pool, root_dir, page_name, namespace)) finally: pool.terminate() pool.join() diff --git a/clang/tools/include-mapping/gen_std.py b/clang/tools/include-mapping/gen_std.py --- a/clang/tools/include-mapping/gen_std.py +++ b/clang/tools/include-mapping/gen_std.py @@ -13,12 +13,6 @@ The generated files are located in clang/include/Tooling/Inclusions. -Caveats and FIXMEs: - - only symbols directly in "std" namespace are added, we should also add std's - subnamespace symbols (e.g. chrono). - - symbols with multiple variants or defined in multiple headers aren't added, - e.g. std::move, std::swap - Usage: 1. Install BeautifulSoup dependency, see instruction: https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup @@ -104,17 +98,12 @@ os.stat(index_page_path).st_mtime).strftime('%Y-%m-%d') print(CODE_PREFIX % (args.language.upper(), cppreference_modified_date)) for symbol in symbols: - if len(symbol.headers) == 1: - # SYMBOL(unqualified_name, namespace, header) - print("SYMBOL(%s, %s, %s)" % (symbol.name, symbol.namespace, - symbol.headers[0])) - elif len(symbol.headers) == 0: + if len(symbol.headers) == 0: sys.stderr.write("No header found for symbol %s\n" % symbol.name) else: - # FIXME: support symbols with multiple headers (e.g. std::move). - sys.stderr.write("Ambiguous header for symbol %s: %s\n" % ( - symbol.name, ', '.join(symbol.headers))) - - + for header in symbol.headers: + # SYMBOL(unqualified_name, namespace, header) + print("SYMBOL(%s, %s, %s)" % (symbol.name, symbol.namespace, + header)) if __name__ == '__main__': main()