diff --git a/libcxx/utils/generate_abi_list.py b/runtimes/utils/generate_abi_list.py rename from libcxx/utils/generate_abi_list.py rename to runtimes/utils/generate_abi_list.py --- a/libcxx/utils/generate_abi_list.py +++ b/runtimes/utils/generate_abi_list.py @@ -8,44 +8,45 @@ # ===----------------------------------------------------------------------===## import argparse -import io -import libcxx.sym_check.extract -import libcxx.sym_check.util -import pprint +import json import sys +from pathlib import Path +import sym_check -def OutputFile(file): - if isinstance(file, io.IOBase): - return file - assert isinstance(file, str), "Got object {} which is not a str".format(file) - return open(file, "w", newline="\n") - -def main(argv): +def main() -> int: parser = argparse.ArgumentParser( description="Extract a list of symbols from a shared library." ) parser.add_argument( - "library", metavar="LIB", type=str, help="The library to extract symbols from." + "library", metavar="LIB", type=Path, help="The library to extract symbols from." ) parser.add_argument( "-o", "--output", dest="output", - type=OutputFile, - default=sys.stdout, - help="The output file to write the symbols to. It is overwritten if it already exists. " + type=Path, + help="The output file to write the symbols to. It is overwritten if it already exists." "If no file is specified, the results are written to standard output.", ) - args = parser.parse_args(argv) + args = parser.parse_args() - symbols = libcxx.sym_check.extract.extract_symbols(args.library) - symbols, _ = libcxx.sym_check.util.filter_stdlib_symbols(symbols) + # extract symbols + symbols = sym_check.extract_symbols(args.library.resolve()) + if symbols is None: + print("unable to extract symbols.", file=sys.stderr) + return 1 - lines = [pprint.pformat(sym, width=99999) for sym in symbols] - args.output.writelines("\n".join(sorted(lines))) + # output symbols + symbols_json = json.dumps(symbols, indent=4) + if args.output is None: + sys.stdout.writelines(symbols_json + "\n") + else: + with args.output.open("w", encoding="utf-8") as output: + output.writelines(symbols_json + "\n") + return 0 if __name__ == "__main__": - main(sys.argv[1:]) + sys.exit(main()) diff --git a/libcxx/utils/sym_diff.py b/runtimes/utils/get_sym_diff.py rename from libcxx/utils/sym_diff.py rename to runtimes/utils/get_sym_diff.py --- a/libcxx/utils/sym_diff.py +++ b/runtimes/utils/get_sym_diff.py @@ -9,23 +9,17 @@ """ sym_diff - Compare two symbol lists and output the differences. """ - -from argparse import ArgumentParser +import argparse import sys -from libcxx.sym_check import diff, util +from pathlib import Path + +import sym_check -def main(): - parser = ArgumentParser( +def main() -> int: + parser = argparse.ArgumentParser( description="Extract a list of symbols from a shared library." ) - parser.add_argument( - "--names-only", - dest="names_only", - help="Only print symbol names", - action="store_true", - default=False, - ) parser.add_argument( "--removed-only", dest="removed_only", @@ -33,13 +27,6 @@ action="store_true", default=False, ) - parser.add_argument( - "--only-stdlib-symbols", - dest="only_stdlib", - help="Filter all symbols not related to the stdlib", - action="store_true", - default=False, - ) parser.add_argument( "--strict", dest="strict", @@ -51,10 +38,9 @@ "-o", "--output", dest="output", - help="The output file. stdout is used if not given", - type=str, - action="store", - default=None, + type=Path, + help="The output file to write the symbols to. It is overwritten if it already exists." + "If no file is specified, the results are written to standard output.", ) parser.add_argument( "--demangle", dest="demangle", action="store_true", default=False @@ -62,38 +48,41 @@ parser.add_argument( "old_syms", metavar="old-syms", - type=str, + type=Path, help="The file containing the old symbol list or a library", ) parser.add_argument( "new_syms", metavar="new-syms", - type=str, + type=Path, help="The file containing the new symbol list or a library", ) args = parser.parse_args() - old_syms_list = util.extract_or_load(args.old_syms) - new_syms_list = util.extract_or_load(args.new_syms) + old_symbols = sym_check.extract_symbols(args.old_syms) + if old_symbols is None: + print(f"Unable to extract symbols from {args.old_syms}.", file=sys.stderr) + return 1 - if args.only_stdlib: - old_syms_list, _ = util.filter_stdlib_symbols(old_syms_list) - new_syms_list, _ = util.filter_stdlib_symbols(new_syms_list) + new_symbols = sym_check.extract_symbols(args.new_syms) + if new_symbols is None: + print(f"Unable to extract symbols from {args.new_syms}.", file=sys.stderr) + return 1 - added, removed, changed = diff.diff(old_syms_list, new_syms_list) - if args.removed_only: - added = {} - report, is_break, is_different = diff.report_diff( - added, removed, changed, names_only=args.names_only, demangle=args.demangle + added, removed, changed = sym_check.diff(old_symbols, new_symbols) + + diff_report, is_different = sym_check.report_diff( + added, removed, changed, args.removed_only, args.demangle # type: ignore ) - if args.output is None: - print(report) + + if args.output: + with args.output.open("w", encoding="utf-8") as f: + f.write(diff_report) else: - with open(args.output, "w") as f: - f.write(report + "\n") - exit_code = 1 if is_break or (args.strict and is_different) else 0 - sys.exit(exit_code) + print(diff_report) + + return 1 if is_different and args.strict else 0 if __name__ == "__main__": - main() + sys.exit(main()) diff --git a/runtimes/utils/sym_check/ToolOutput.py b/runtimes/utils/sym_check/ToolOutput.py new file mode 100644 --- /dev/null +++ b/runtimes/utils/sym_check/ToolOutput.py @@ -0,0 +1,9 @@ +from dataclasses import dataclass +from typing import List + + +@dataclass +class ToolOutput: + returncode: int + command: List[str] + data: str diff --git a/runtimes/utils/sym_check/__init__.py b/runtimes/utils/sym_check/__init__.py new file mode 100644 --- /dev/null +++ b/runtimes/utils/sym_check/__init__.py @@ -0,0 +1,34 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## +from sym_check.diff import ( + added_symbols, + changed_symbols, + diff, + removed_symbols, + report_diff, +) +from sym_check.extract import SymbolExtractor, extract_symbols, load_symbols + + +"""ABI symbol checker""" + +__author__ = "Ian Michael Francis" +__email__ = "IanMichaelFrancis@outlook.com" +__versioninfo__ = (1, 0, 0) +__version__ = ".".join(str(v) for v in __versioninfo__) + +__all__ = [ + "added_symbols", + "changed_symbols", + "diff", + "removed_symbols", + "report_diff", + "SymbolExtractor", + "extract_symbols", + "load_symbols", +] diff --git a/runtimes/utils/sym_check/diff.py b/runtimes/utils/sym_check/diff.py new file mode 100644 --- /dev/null +++ b/runtimes/utils/sym_check/diff.py @@ -0,0 +1,114 @@ +# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80: +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## +from pathlib import Path +import shutil +from typing import Dict, List, Optional, Tuple + +from sym_check import utils +from sym_check.utils import JSON + + +def _symbol_difference(lhs: JSON, rhs: JSON) -> JSON: + lhs_names = set(((n["Name"], n["Type"]) for n in (x for x in lhs))) + rhs_names = set(((n["Name"], n["Type"]) for n in (x for x in rhs))) + diff_names = lhs_names - rhs_names + return [n for n in lhs if (n["Name"], n["Type"]) in diff_names] # type: ignore + + +def _find_by_key(sym_list: JSON, k: str) -> Optional[JSON]: + for sym in sym_list: + if sym["Name"] == k: + return sym + return None + + +def added_symbols(old: JSON, new: JSON) -> List[JSON]: + return _symbol_difference(new, old) + + +def removed_symbols(old: JSON, new: JSON) -> List[JSON]: + return _symbol_difference(old, new) + + +def changed_symbols(old: JSON, new: JSON) -> List[Tuple[JSON, JSON]]: + changed: List[Tuple[JSON, JSON]] = [] + for old_sym in old: + # symbol unchanged + if old_sym in new: + continue + + new_sym = _find_by_key(new, old_sym["Name"]) + if new_sym in old or new_sym in (None, old_sym): + assert isinstance(old_sym, JSON) + assert isinstance(new_sym, JSON) + changed.append((old_sym, new_sym)) + + return changed + + +def diff( + old: JSON, new: JSON +) -> Tuple[List[JSON], List[JSON], List[Tuple[JSON, JSON]]]: + added = added_symbols(old, new) + removed = removed_symbols(old, new) + changed = changed_symbols(old, new) + return added, removed, changed + + +def report_diff( + added_syms: JSON, + removed_syms: JSON, + changed_syms: JSON, + removed_only: bool = False, + demangle: bool = True, +) -> Tuple[str, bool]: + cppfilt: Optional[Path] = None + report = "" + if demangle is True: + _cppfilt: Optional[str] = shutil.which("c++filt") + if _cppfilt is not None: + cppfilt = Path(_cppfilt).resolve() + else: + report += "c++filt not found in PATH, skipping demangling.\n" + + added_count, removed_count, changed_count = 0, 0, 0 + if removed_only is False: + # check added symbols + if len(added_syms) > 0: + added_count = len(added_syms) + report += f"Symbols Added: {added_count}\n" + for sym in added_syms: + sym["Name"] = utils.demangle_symbol(sym["Name"], cppfilt) + report += f"Added {sym}\n" + + # check changed symbols + if len(changed_syms) > 0: + changed_count = len(changed_syms) + report += f"Symbols Changed: {changed_count}\n" + for old_sym, new_sym in changed_syms: + old_sym["Name"] = utils.demangle_symbol(old_sym["Name"], cppfilt) + new_sym["Name"] = utils.demangle_symbol(new_sym["Name"], cppfilt) + report += f"{old_sym['Name']}: {old_sym} -> {new_sym}\n" + + if len(removed_syms) > 0: + removed_count = len(removed_syms) + report += f"Symbols Removed: {removed_count}\n" + for sym in removed_syms: + sym["Name"] = utils.demangle_symbol(sym["Name"], cppfilt) + report += f"Removed {sym}\n" + + if added_count + removed_count + changed_count > 0: # at least one ABI break + report += "ABI BREAKAGE!: " + if removed_only: + report += f"{removed_count} removed.\n" + else: + report += f"{added_count} added, {removed_count} removed, {changed_count} changed.\n" + return report, True + + return "Symbols unchanged", False diff --git a/runtimes/utils/sym_check/extract.py b/runtimes/utils/sym_check/extract.py new file mode 100644 --- /dev/null +++ b/runtimes/utils/sym_check/extract.py @@ -0,0 +1,94 @@ +import json +import platform +import shutil +import subprocess +import sys +from pathlib import Path +from typing import Dict, List, Optional + +import magic +import yaml +from sym_check.ToolOutput import ToolOutput +from sym_check import utils +from sym_check.utils import JSON + + +class SymbolExtractor: + def __init__(self, lib: Path) -> None: + self.lib = lib.resolve() + magic_data = magic.from_file(self.lib) + if "ELF" in magic_data: + self.executable_type = "ELF" + elif "ASCII" in magic_data: + self.executable_type = "IFS" + + def ifs_extract( + self, output_file: Path, ifs_dir: Optional[Path] = None + ) -> ToolOutput: + if ifs_dir is None: + env_ifs = shutil.which("llvm-ifs") + if env_ifs is None: + return ToolOutput( + 1, ["which", "llvm-ifs"], "Error: Cannot find llvm-ifs in PATH" + ) + ifs_dir = Path(env_ifs).resolve() + + cmd = [ + ifs_dir.as_posix(), + self.lib.as_posix(), + f"--input-format={self.executable_type}", + f"--output-ifs={output_file.resolve()}", + ] + + llvm_ifs = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + check=False, + ) + return ToolOutput(llvm_ifs.returncode, cmd, llvm_ifs.stdout) + + +def load_symbols(file: Path) -> JSON: + symbols: Optional[JSON] = None + with file.open("r", encoding="utf-8") as f: + ifs_contents: JSON | Dict[str, str] = yaml.load( + f.read(), Loader=yaml.BaseLoader + ) + if isinstance(ifs_contents, dict): # assume llvm-ifs output + loaded_symbols = ifs_contents.get("Symbols") + assert isinstance( + loaded_symbols, JSON + ), f"Error: No symbols found in {file.resolve()}" + symbols = loaded_symbols + else: + assert isinstance(ifs_contents, JSON) + symbols = ifs_contents + + return symbols + + +def extract_symbols(file: Path) -> Optional[JSON]: + if not utils.is_library_file(file): + symbols = load_symbols(file) + return symbols + + assert utils.is_shared_library( + file + ), "Error: Library {lib_file.resolve()} is not a shared library." + + extractor = SymbolExtractor(file) + output_file = Path("tmp_llvm-ifs-output.ifs") + tool_output = extractor.ifs_extract(output_file) + if tool_output.returncode != 0: + print( + f"{tool_output.data}\nFailing command: {tool_output.command}", + file=sys.stderr, + ) + return None + + symbols = load_symbols(output_file) + output_file.unlink() # delete the output file created by llvm-ifs + + return symbols diff --git a/runtimes/utils/sym_check/utils.py b/runtimes/utils/sym_check/utils.py new file mode 100644 --- /dev/null +++ b/runtimes/utils/sym_check/utils.py @@ -0,0 +1,85 @@ +import json +import platform +import shutil +import subprocess +import sys +from pathlib import Path +from typing import Dict, List, NewType, Optional, Tuple + + +class JSON(list): + # mypy does not support recurssive types yet + pass + + +def demangle_symbol(symbol: str, cppfilt: Optional[Path]) -> str: + if cppfilt is None: + return symbol + + result = subprocess.run( + [cppfilt.resolve().as_posix()], input=symbol.encode(), capture_output=True + ) + if result.returncode != 0: + return symbol + return result.stdout.decode() + + +def is_mach_o(file: Path) -> bool: + with file.open("rb") as f: + magic_bytes = f.read(4) + return magic_bytes in [ + b"\xfe\xed\xfa\xce", # MH_MAGIC + b"\xce\xfa\xed\xfe", # MH_CIGAM + b"\xfe\xed\xfa\xcf", # MH_MAGIC_64 + b"\xcf\xfa\xed\xfe", # MH_CIGAM_64 + b"\xca\xfe\xba\xbe", # FAT_MAGIC + b"\xbe\xba\xfe\xca", # FAT_CIGAM + ] + + +def is_xcoff_or_big_ar(file: Path) -> bool: + with file.open("rb") as f: + magic_bytes = f.read(7) + return ( + magic_bytes[:4] in [b"\x01DF", b"\x01F7"] or magic_bytes == b"" + ) # XCOFF32 # XCOFF64 + + +def is_elf(file: Path) -> bool: + with file.open("rb") as f: + magic_bytes = f.read(4) + return magic_bytes == b"\x7fELF" + + +def is_library_file(file: Path) -> bool: + if platform.system() == "Darwin": + return is_mach_o(file) + + if platform.system() == "AIX": + return is_xcoff_or_big_ar(file) + + return is_elf(file) + + +def _aix_is_shared_lib(lib_file: Path) -> bool: + """ + Check for the shared object flag in XCOFF headers of the input file or + library archive. + """ + dump = shutil.which("dump") + if dump is None: + print("ERROR: Could not find dump", file=sys.stderr) + return False + + cmd: List[str] = [dump, "-X32_64", "-ov", lib_file.as_posix()] + out = subprocess.check_output(cmd).decode() + + return out.find("SHROBJ") != -1 + + +def is_shared_library(lib_file: Path) -> bool: + return ( + _aix_is_shared_lib(lib_file) + if platform.system() == "AIX" + else lib_file.suffix == ".so" + )