Index: utils/sym_check/sym_check/__init__.py =================================================================== --- /dev/null +++ utils/sym_check/sym_check/__init__.py @@ -0,0 +1,8 @@ +"""libcxx abi symbol checker""" + +__author__ = 'Eric Fiselier' +__email__ = 'eric@efcs.ca' +__versioninfo__ = (0, 1, 0) +__version__ = ' '.join(str(v) for v in __versioninfo__) + 'dev' + +__all__ = ['diff', 'extract', 'util'] Index: utils/sym_check/sym_check/diff.py =================================================================== --- /dev/null +++ utils/sym_check/sym_check/diff.py @@ -0,0 +1,86 @@ +# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80: +""" +diff - A set of functions for diff-ing two symbol lists. +""" + +from sym_check import util + + +def _symbol_difference(lhs, rhs): + lhs_names = set((n['name'] for n in lhs)) + rhs_names = set((n['name'] for n in rhs)) + diff_names = lhs_names - rhs_names + return [n for n in lhs if n['name'] in diff_names] + + +def _find_by_key(sym_list, k): + for sym in sym_list: + if sym['name'] == k: + return sym + return None + + +def added_symbols(old, new): + return _symbol_difference(new, old) + + +def removed_symbols(old, new): + return _symbol_difference(old, new) + + +def changed_symbols(old, new): + changed = [] + for old_sym in old: + new_sym = _find_by_key(new, old_sym['name']) + if new_sym is not None and cmp(old_sym, new_sym) != 0: + changed += [(old_sym, new_sym)] + return changed + + +def diff(old, new): + added = added_symbols(old, new) + removed = removed_symbols(old, new) + changed = changed_symbols(old, new) + return added, removed, changed + + +def diff_files(old_f, new_f): + old = util.read_syms_from_file(old_f) + new = util.read_syms_from_file(new_f) + return diff(old, new) + + +def report_diff(added_syms, removed_syms, changed_syms, names_only=False): + report = '' + for sym in added_syms: + report += ('Symbol added: %s\n %s\n' % + (util.demangle_symbol(sym['name']), sym)) + for sym in removed_syms: + report += ('SYMBOL REMOVED: %s\n %s\n' % + (util.demangle_symbol(sym['name']), sym)) + if not names_only: + for sym_pair in changed_syms: + old_sym, new_sym = sym_pair + old_str = '\n OLD SYMBOL: %s' % old_sym + new_str = '\n NEW SYMBOL: %s' % new_sym + report += ('SYMBOL CHANGED: %s%s%s\n' % + (util.demangle_symbol(old_sym['name']), + old_str, new_str)) + + added = bool(len(added_syms) != 0) + abi_break = bool(len(removed_syms)) + if not names_only: + abi_break = abi_break or len(changed_syms) + if added or abi_break: + report += 'Summary\n' + report += ' Added: %d\n' % len(added_syms) + report += ' Removed: %d\n' % len(removed_syms) + if not names_only: + report += ' Changed: %d\n' % len(changed_syms) + if not abi_break: + report += 'Symbols added.' + else: + report += 'ABI BREAKAGE: SYMBOLS ADDED OR REMOVED!' + else: + report += 'Symbols match.' + return report, int(abi_break) Index: utils/sym_check/sym_check/extract.py =================================================================== --- /dev/null +++ utils/sym_check/sym_check/extract.py @@ -0,0 +1,127 @@ +# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80: +""" +extract - A set of function that extract symbol lists from shared libraries. +""" +import distutils.spawn +import sys + +from sym_check import util + + +class Extractor(object): + """ + Extractor - Extract symbol lists from libraries using nm. + """ + POSIX_NM = 0 + LLVM_NM = 1 + + @staticmethod + def choose_nm(): + """ + Search for the nm executable and return the path and type. + """ + nm_exe = distutils.spawn.find_executable('llvm-nm') + if nm_exe is not None: + return nm_exe, Extractor.LLVM_NM + nm_exe = distutils.spawn.find_executable('nm') + if nm_exe is not None: + return nm_exe, Extractor.POSIX_NM + # ERROR no NM found + print("ERROR: Could not find llvm-nm or nm") + sys.exit(1) + + def __init__(self): + """ + Initialize the nm executable and flags that will be used to extract + symbols from shared libraries. + """ + self.nm_exe, self.nm_type = Extractor.choose_nm() + self.function_types = ['T', 'W'] + self.object_types = ['B', 'D', 'R', 'r', 'V', 'S'] + if self.nm_type == Extractor.LLVM_NM: + self.flags = ['-B', '-g', '-D', '-defined-only', '-print-size'] + self.extract_sym = self._extract_llvm_nm + else: + self.flags = ['-P', '-g'] + self.extract_sym = self._extract_nm + + def extract(self, lib): + """ + Extract symbols from a library and return the results as a dict of + parsed symbols. + """ + cmd = [self.nm_exe] + self.flags + [lib] + out, _, exit_code = util.execute_command_verbose(cmd) + if exit_code != 0: + raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib)) + fmt_syms = (self.extract_sym(l) for l in out.splitlines() if l.strip()) + # Cast symbol to string. + final_syms = (repr(s) for s in fmt_syms if self._want_sym(s)) + # Make unique and sort strings. + tmp_list = list(sorted(set(final_syms))) + # Cast string back to symbol. + return util.read_syms_from_list(tmp_list) + + def _extract_nm(self, sym_str): + bits = sym_str.split() + # Everything we want has at least two columns. + if len(bits) < 2: + return None + new_sym = { + 'name': bits[0], + 'type': bits[1] + } + new_sym = self._transform_sym_type(new_sym) + # NM types which we want to save the size for. + if new_sym['type'] == 'OBJECT' and len(bits) > 3: + new_sym['size'] = int(bits[3], 16) + return new_sym + + def _extract_llvm_nm(self, sym_str): + bits = sym_str.split() + assert len(bits) == 3 or len(bits) == 4 + new_sym = { + 'name': bits[-1], + 'type': bits[-2] + } + new_sym = self._transform_sym_type(new_sym) + if new_sym['type'] == 'OBJECT': + assert len(bits) == 4 + new_sym['size'] = int(bits[-3], 16) + return new_sym + + @staticmethod + def _want_sym(sym): + """ + Check that s is a valid symbol that we want to keep. + """ + if sym is None or len(sym) < 2: + return False + bad_types = ['U', 'w'] + return (sym['type'] not in bad_types + and (sym['name'].startswith('_Z') or + sym['name'].startswith('__Z'))) + + @staticmethod + def _transform_sym_type(sym): + """ + Map the nm single letter output for type to either FUNC or OBJECT. + If the type is not recognized it is left unchanged. + """ + func_types = ['T', 'W', 't'] + obj_types = ['B', 'D', 'R', 'V', 'S'] + if sym['type'] in func_types: + sym['type'] = 'FUNC' + elif sym['type'] in obj_types: + sym['type'] = 'OBJECT' + return sym + + +def extract(lib_file): + """ + Extract and return a list of symbols extracted from a dynamic library. + The symbols are extracted using NM. They are then filtered and formated. + Finally they symbols are made unique. + """ + extractor = Extractor() + return extractor.extract(lib_file) Index: utils/sym_check/sym_check/util.py =================================================================== --- /dev/null +++ utils/sym_check/sym_check/util.py @@ -0,0 +1,86 @@ +import ast +import distutils.spawn +import signal +import subprocess +import sys + + +def execute_command(cmd, input_str=None): + """ + Execute a command, capture and return its output. + """ + kwargs = { + 'stdin': subprocess.PIPE, + 'stdout': subprocess.PIPE, + 'stderr': subprocess.PIPE, + } + p = subprocess.Popen(cmd, **kwargs) + out, err = p.communicate(input=input_str) + exitCode = p.wait() + if exitCode == -signal.SIGINT: + raise KeyboardInterrupt + return out, err, exitCode + + +def execute_command_verbose(cmd, input_str=None): + """ + Execute a command and print its output on failure. + """ + out, err, exitCode = execute_command(cmd, input_str=input_str) + if exitCode != 0: + report = "Command: %s\n" % ' '.join(["'%s'" % a for a in cmd]) + report += "Exit Code: %d\n" % exitCode + if out: + report += "Standard Output:\n--\n%s--" % out + if err: + report += "Standard Error:\n--\n%s--" % err + report += "\n\nFailed!" + sys.stderr.write('%s\n' % report) + return out, err, exitCode + + +def read_syms_from_list(slist): + """ + Read a list of symbols from a list of strings. + Each string is one symbol. + """ + return [ast.literal_eval(l) for l in slist] + + +def read_syms_from_file(filename): + """ + Read a list of symbols in from a file. + """ + with open(filename, 'r') as f: + data = f.read() + return read_syms_from_list(data.splitlines()) + + +def write_syms(sym_list, out=None, names_only=False): + """ + Write a list of symbols to the file named by out. + """ + out_str = '' + for sym in sym_list: + if names_only: + out_str += '%s\n' % sym['name'] + else: + out_str += '%r\n' % sym + if out is None: + sys.stdout.write(out_str) + else: + with open(out, 'w') as f: + f.write(out_str) + + +_cppfilt_exe = distutils.spawn.find_executable('c++filt') + + +def demangle_symbol(symbol): + if _cppfilt_exe is None: + return symbol + out, _, exit_code = execute_command_verbose( + [_cppfilt_exe], input_str=symbol) + if exit_code != 0: + return symbol + return out Index: utils/sym_check/sym_diff.py =================================================================== --- /dev/null +++ utils/sym_check/sym_diff.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +""" +sym_diff - Compare two symbol lists and output the differences. +""" +from argparse import ArgumentParser +import sys +from sym_check import diff + + +def main(): + parser = ArgumentParser( + description='Extract a list of symbols from a shared library.') + parser.add_argument('--names-only', dest='names_only', + help='Only print symbol names', + action='store_true', default=False) + parser.add_argument('-o', '--output', dest='output', + help='The output file. stdout is used if not given', + type=str, action='store', default=None) + parser.add_argument('old_syms', metavar='old-syms', type=str, + help='The file containing the old symbol list') + parser.add_argument('new_syms', metavar='new-syms', type=str, + help='The file containing the new symbol list') + args = parser.parse_args() + + added, removed, changed = diff.diff_files(args.old_syms, args.new_syms) + report, is_break = diff.report_diff(added, removed, changed, + args.names_only) + if args.output is None: + print(report) + else: + with open(args.output, 'w') as f: + f.write(report + '\n') + sys.exit(is_break) + + +if __name__ == '__main__': + main() Index: utils/sym_check/sym_extract.py =================================================================== --- /dev/null +++ utils/sym_check/sym_extract.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +""" +sym_extract - Extract and output a list of symbols from a shared library. +""" +from argparse import ArgumentParser +from sym_check import extract, util + + +def main(): + parser = ArgumentParser( + description='Extract a list of symbols from a shared library.') + parser.add_argument('library', metavar='shared-lib', type=str, + help='The library to extract symbols from') + parser.add_argument('-o', '--output', dest='output', + help='The output file. stdout is used if not given', + type=str, action='store', default=None) + parser.add_argument('--names-only', dest='names_only', + help='Output only the name of the symbol', + action='store_true', default=False) + args = parser.parse_args() + + if args.output is not None: + print('Extracting symbols from %s to %s.' + % (args.library, args.output)) + syms = extract.extract(args.library) + util.write_syms(syms, args.output, names_only=args.names_only) + + +if __name__ == '__main__': + main()