diff --git a/runtimes/utils/generate_abi_list.py b/runtimes/utils/generate_abi_list.py new file mode 100755 --- /dev/null +++ b/runtimes/utils/generate_abi_list.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +import argparse +import json +import sys +from pathlib import Path + +from sym_check import extract + + +def main() -> int: + parser = argparse.ArgumentParser(description="Extract a list of symbols from a shared library.") + parser.add_argument("library", metavar="LIB", type=Path, help="The library to extract symbols from.") + parser.add_argument( + "-o", + "--output", + dest="output", + type=Path, + help="The output file to write the symbols to. It is overwritten if it already exists." + "If no file is specified, the results are written to standard output.", + ) + args = parser.parse_args() + + # extract symbols + symbols = extract.extract_symbols(args.library.resolve()) + if symbols is None: + print("unable to extract symbols.", file=sys.stderr) + return 1 + + # output symbols + symbols_json = json.dumps(symbols, indent=4) + if args.output is None: + sys.stdout.writelines(symbols_json + "\n") + else: + with args.output.open("w", encoding="utf-8") as output: + output.writelines(symbols_json + "\n") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/runtimes/utils/get_sym_diff.py b/runtimes/utils/get_sym_diff.py new file mode 100755 --- /dev/null +++ b/runtimes/utils/get_sym_diff.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## +""" +sym_diff - Compare two symbol lists and output the differences. +""" +import argparse +import sys +from pathlib import Path + +from sym_check import diff, extract, utils + + +def main() -> int: + parser = argparse.ArgumentParser(description="Extract a list of symbols from a shared library.") + parser.add_argument( + "--removed-only", dest="removed_only", help="Only print removed symbols", action="store_true", default=False + ) + parser.add_argument( + "--strict", + dest="strict", + help="Exit with a non-zero status if any symbols " "differ", + action="store_true", + default=False, + ) + parser.add_argument( + "-o", + "--output", + dest="output", + type=Path, + help="The output file to write the symbols to. It is overwritten if it already exists." + "If no file is specified, the results are written to standard output.", + ) + parser.add_argument( + "--demangle", dest="demangle", action="store_true", default=False + ) + parser.add_argument( + "old_syms", metavar="old-syms", type=Path, help="The file containing the old symbol list or a library" + ) + parser.add_argument( + "new_syms", metavar="new-syms", type=Path, help="The file containing the new symbol list or a library" + ) + args = parser.parse_args() + + # extract symbols + old_symbols = utils.extract_or_load(args.old_syms) + if old_symbols is None: + print(f"Unable to extract symbols from {args.old_syms}.", file=sys.stderr) + return 1 + + new_symbols = utils.extract_or_load(args.new_syms) + if new_symbols is None: + print(f"Unable to extract symbols from {args.new_syms}.", file=sys.stderr) + return 1 + + # get diff + added, removed, changed = diff.diff(old_symbols, new_symbols) + + # report diff + report, is_different = diff.report_diff(added, removed, changed, args.demangle) + if args.output: + with args.output.open("w", encoding="utf-8") as f: + f.write(report) + else: + print(report) + + if is_different and args.strict: + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/runtimes/utils/sym_check/ToolOutput.py b/runtimes/utils/sym_check/ToolOutput.py new file mode 100644 --- /dev/null +++ b/runtimes/utils/sym_check/ToolOutput.py @@ -0,0 +1,9 @@ +from dataclasses import dataclass +from typing import List + + +@dataclass +class ToolOutput: + returncode: int + command: List[str] + data: str diff --git a/runtimes/utils/sym_check/__init__.py b/runtimes/utils/sym_check/__init__.py new file mode 100644 --- /dev/null +++ b/runtimes/utils/sym_check/__init__.py @@ -0,0 +1,16 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +"""ABI symbol checker""" + +__author__ = "Ian Michael Francis" +__email__ = "IanMichaelFrancis@outlook.com" +__versioninfo__ = (1, 0, 0) +__version__ = ".".join(str(v) for v in __versioninfo__) + +__all__ = ["diff", "extract", "utils"] diff --git a/runtimes/utils/sym_check/diff.py b/runtimes/utils/sym_check/diff.py new file mode 100644 --- /dev/null +++ b/runtimes/utils/sym_check/diff.py @@ -0,0 +1,115 @@ +# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80: +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## +""" +diff - A set of functions for diff-ing two symbol lists. +""" +from pathlib import Path +import shutil +from typing import Dict, List, Optional, Tuple + +from sym_check import utils +from sym_check.utils import Json + +SymsList = List[Dict[str, str]] + + +def _symbol_difference(lhs: Json, rhs: Json): + lhs_names = set(((n["Name"], n["Type"]) for n in (x for x in lhs))) + rhs_names = set(((n["Name"], n["Type"]) for n in (x for x in rhs))) + diff_names = lhs_names - rhs_names + return [n for n in lhs if (n["Name"], n["Type"]) in diff_names] + + +def _find_by_key(sym_list, k): + for sym in sym_list: + if sym["Name"] == k: + return sym + return None + + +def added_symbols(old: Json, new: Json): + return _symbol_difference(new, old) + + +def removed_symbols(old: Json, new: Json): + return _symbol_difference(old, new) + + +def changed_symbols(old: Json, new: Json): + changed: List[Tuple[Json, Json]] = [] + for old_sym in old: + if old_sym in new: + continue + + new_sym = _find_by_key(new, old_sym["Name"]) + if not (new_sym is None or new_sym in old or old_sym == new_sym): + changed.append((old_sym, new_sym)) + return changed + + +def diff(old: Json, new: Json): + added = added_symbols(old, new) + removed = removed_symbols(old, new) + changed = changed_symbols(old, new) + return added, removed, changed + + +def report_diff( + added_syms: SymsList, + removed_syms: SymsList, + changed_syms: SymsList, + removed_only: bool = False, + demangle: bool = True, +) -> Tuple[str, bool]: + demangle_warning = "" + if demangle is True: + cppfilt = shutil.which("c++filt") + if cppfilt is not None: + cppfilt = Path(cppfilt).resolve() + else: + demangle_warning = "c++filt not found in PATH, skipping demangling.\n" + + report = "" + added_count, removed_count, changed_count = 0, 0, 0 + if added_syms and not removed_only: + added_count = len(added_syms) + report += f"Symbols Added: {added_count}\n" + for sym in added_syms: + if demangle: + sym["Name"] = utils.demangle_symbol(sym["Name"], cppfilt) + report += f"Added {sym}\n" + + if removed_syms: + added_count = len(added_syms) + report += f"Symbols Removed: {len(removed_syms)}\n" + for sym in removed_syms: + if demangle: + sym["Name"] = utils.demangle_symbol(sym["Name"], cppfilt) + report += f"Removed {sym}\n" + + if changed_syms and not removed_only: + added_count = len(added_syms) + report += f"Symbols Changed: {len(changed_syms)}\n" + for old_sym, new_sym in changed_syms: + if demangle: + old_sym["Name"] = utils.demangle_symbol(old_sym["Name"], cppfilt) + new_sym["Name"] = utils.demangle_symbol(new_sym["Name"], cppfilt) + report += f"{old_sym['Name']}: {old_sym} -> {new_sym}\n" + + if report: + report += "ABI BREAKAGE!: " + if removed_only: + report += f"{removed_count} removed.\n" + else: + report += f"{added_count} added, {removed_count} removed, {changed_count} changed.\n" + + report = demangle_warning + report + return report, True + + return "Symbols unchanged", False diff --git a/runtimes/utils/sym_check/extract.py b/runtimes/utils/sym_check/extract.py new file mode 100644 --- /dev/null +++ b/runtimes/utils/sym_check/extract.py @@ -0,0 +1,107 @@ +import platform +import shutil +import subprocess +import sys +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import magic +import yaml +from sym_check.ToolOutput import ToolOutput +from sym_check.utils import Json + + +class SymbolExtractor: + def __init__(self, lib: Path) -> None: + self.lib = lib.resolve() + magic_data = magic.from_file(self.lib) + if "ELF" in magic_data: + self.executable_type = "ELF" + elif "ASCII" in magic_data: + self.executable_type = "IFS" + + def ifs_extract(self, output_file: Path, ifs_dir: Optional[Path] = None) -> ToolOutput: + if ifs_dir is None: + _env_ifs = shutil.which("llvm-ifs") + if _env_ifs is None: + return ToolOutput(1, ["which", "llvm-ifs"], "Error: Cannot find llvm-ifs in PATH") + ifs_dir = Path(_env_ifs).resolve() + + cmd = [ + ifs_dir.as_posix(), + self.lib.as_posix(), + f"--input-format={self.executable_type}", + f"--output-ifs={output_file.resolve()}", + ] + + llvm_ifs = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + check=False, + ) + return ToolOutput(llvm_ifs.returncode, cmd, llvm_ifs.stdout) + + +def parse_symbols(file: Path) -> Tuple[Optional[Json], Optional[str]]: + if not file.exists(): + return None, f"Error: File {file.resolve()} does not exist" + + with file.open("r", encoding="utf-8") as ifs_output: + contents = "".join(ifs_output.readlines()[1:]) # the first line is "--- !ifs-v1", making it invalid yaml + ifs_contents = yaml.safe_load(contents) + + symbols: Optional[Json] = ifs_contents.get("Symbols") + if symbols is None: + return None, f"Error: No symbols found in {file.resolve()}" + + return symbols, None + + +def _aix_is_shared_lib(lib_file: Path) -> bool: + """ + Check for the shared object flag in XCOFF headers of the input file or + library archive. + """ + dump = shutil.which("dump") + if dump is None: + print("ERROR: Could not find dump", file=sys.stderr) + return False + + cmd: List[str] = [dump, "-X32_64", "-ov", lib_file.as_posix()] + out = subprocess.check_output(cmd).decode() + + return out.find("SHROBJ") != -1 + + +def is_shared_library(lib_file: Path) -> bool: + """ + Determine if a given library is static or shared. + """ + if platform.system() == "AIX": + return _aix_is_shared_lib(lib_file) + return lib_file.suffix == ".so" + + +def extract_symbols(lib_file: Path) -> Optional[Json]: + if not is_shared_library(lib_file): + print(f"Error: Library {lib_file.resolve()} is not a shared library.", file=sys.stderr) + return None + + extractor = SymbolExtractor(lib_file) + output_file = Path("tmp_llvm-ifs-output.ifs") + tool_output = extractor.ifs_extract(output_file) + + if tool_output.returncode != 0: + print(tool_output.data, file=sys.stderr) + print(f"Failing command: {tool_output.command}", file=sys.stderr) + return None + + symbols, err = parse_symbols(output_file) + output_file.unlink() # delete the temp file created by llvm-ifs + if err is not None: + print(err, file=sys.stderr) + return None + + return symbols diff --git a/runtimes/utils/sym_check/utils.py b/runtimes/utils/sym_check/utils.py new file mode 100644 --- /dev/null +++ b/runtimes/utils/sym_check/utils.py @@ -0,0 +1,70 @@ +import json +import platform +import subprocess +import sys +from pathlib import Path +from typing import Dict, List, NewType, Optional, Tuple + + +Json = NewType("Json", List[Dict[str, str]]) + + +def demangle_symbol(symbol: str, cppfilt: Path) -> str: + result = subprocess.run([cppfilt.resolve().as_posix()], input=symbol.encode(), capture_output=True) + if result.returncode != 0: + return symbol + return result.stdout.decode() + +def is_mach_o(file: Path) -> bool: + with file.open("rb") as f: + magic_bytes = f.read(4) + return magic_bytes in [ + b"\xfe\xed\xfa\xce", # MH_MAGIC + b"\xce\xfa\xed\xfe", # MH_CIGAM + b"\xfe\xed\xfa\xcf", # MH_MAGIC_64 + b"\xcf\xfa\xed\xfe", # MH_CIGAM_64 + b"\xca\xfe\xba\xbe", # FAT_MAGIC + b"\xbe\xba\xfe\xca", # FAT_CIGAM + ] + + +def is_xcoff_or_big_ar(file: Path) -> bool: + with file.open("rb") as f: + magic_bytes = f.read(7) + return magic_bytes[:4] in [b"\x01DF", b"\x01F7"] or magic_bytes == b"" # XCOFF32 # XCOFF64 + + +def is_elf(file: Path) -> bool: + with file.open("rb") as f: + magic_bytes = f.read(4) + return magic_bytes == b"\x7fELF" + + +def is_library_file(file: Path) -> bool: + if platform.system() == "Darwin": + return is_mach_o(file) + + if platform.system() == "AIX": + return is_xcoff_or_big_ar(file) + + return is_elf(file) + + +def read_symbols_from_json(file: Path) -> Optional[Json]: + if not file.exists(): + print(f"Error: File {file.resolve()} does not exist.", file=sys.stderr) + return None + + return json.load(file.open("r")) + + +def extract_or_load(file: Path) -> Optional[Json]: + from sym_check import extract + if is_library_file(file): + symbols = extract.extract_symbols(file) + if symbols is None: + print("unable to extract symbols for {file.resolve()}.", file=sys.stderr) + return None + return symbols + + return read_symbols_from_json(file)