diff --git a/llvm/utils/extract_symbols.py b/llvm/utils/extract_symbols.py --- a/llvm/utils/extract_symbols.py +++ b/llvm/utils/extract_symbols.py @@ -131,6 +131,21 @@ def aix_is_32bit_windows(lib): return False +# An approximation of identifying private symbols without actually demangling. +# This has no known false-positives as far as clang is concerned, but tons +# of false-negatives for e.g. templates, but that's enough for now. +def is_private(symbol): + # Bail on special symbols (*structors, operators), and templates, which + # require more understanding of the mangled symbol. + if symbol.startswith('??') or "?$" in symbol: + return False + # See the description of mangling further below. Catch what looks like + # function symbols. A function-class between A and F function-class is + # private. + match = re.search('(?@. By examining the # identifier/type mangling we can decide which symbols could possibly be # required and which we can discard. @@ -141,7 +156,10 @@ # Remove calling convention decoration from names match = re.match('[_@]([^@]+)', symbol) if match: - return match.group(1) + symbol = match.group(1) + # Discard floating point/SIMD constants. + if symbol.startswith(("__xmm@", "__real@")): + return None return symbol # Function template instantiations start with ?$; keep the instantiations of # clang::Type::getAs, as some of them are explipict specializations that are @@ -165,6 +183,9 @@ # namespace doesn't exist outside of that translation unit. elif re.search('\?A(0x\w+)?@', symbol): return None + # Skip private symbols, which plugins wouldn't be able to use. + elif is_private(symbol): + return None # Keep mangled llvm:: and clang:: function symbols. How we detect these is a # bit of a mess and imprecise, but that avoids having to completely demangle # the symbol name. The outermost namespace is at the end of the identifier