diff --git a/libcxx/utils/graph_header_deps.py b/libcxx/utils/graph_header_deps.py --- a/libcxx/utils/graph_header_deps.py +++ b/libcxx/utils/graph_header_deps.py @@ -7,202 +7,205 @@ # #===----------------------------------------------------------------------===## -from argparse import ArgumentParser +import argparse import os -import shutil -import sys -import shlex -import json import re -import libcxx.graph as dot -import libcxx.util - -def print_and_exit(msg): - sys.stderr.write(msg + '\n') - sys.exit(1) - -def libcxx_include_path(): - curr_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - include_dir = os.path.join(curr_dir, 'include') - return include_dir - -def get_libcxx_headers(): - headers = [] - include_dir = libcxx_include_path() - for fname in os.listdir(include_dir): - f = os.path.join(include_dir, fname) - if not os.path.isfile(f): - continue - base, ext = os.path.splitext(fname) - if (ext == '' or ext == '.h') and (not fname.startswith('__') or fname == '__config'): - headers += [f] - return headers - - -def rename_headers_and_remove_test_root(graph): - inc_root = libcxx_include_path() - to_remove = set() - for n in graph.nodes: - assert 'label' in n.attributes - l = n.attributes['label'] - if not l.startswith('/') and os.path.exists(os.path.join('/', l)): - l = '/' + l - if l.endswith('.tmp.cpp'): - to_remove.add(n) - if l.startswith(inc_root): - l = l[len(inc_root):] - if l.startswith('/'): - l = l[1:] - n.attributes['label'] = l - for n in to_remove: - graph.removeNode(n) - -def remove_non_std_headers(graph): - inc_root = libcxx_include_path() - to_remove = set() - for n in graph.nodes: - test_file = os.path.join(inc_root, n.attributes['label']) - if not test_file.startswith(inc_root): - to_remove.add(n) - for xn in to_remove: - graph.removeNode(xn) - -class DependencyCommand(object): - def __init__(self, compile_commands, output_dir, new_std=None): - output_dir = os.path.abspath(output_dir) - if not os.path.isdir(output_dir): - print_and_exit('"%s" must point to a directory' % output_dir) - self.output_dir = output_dir - self.new_std = new_std - cwd,bcmd = self._get_base_command(compile_commands) - self.cwd = cwd - self.base_cmd = bcmd - - def run_for_headers(self, header_list): - outputs = [] - for header in header_list: - header_name = os.path.basename(header) - out = os.path.join(self.output_dir, ('%s.dot' % header_name)) - outputs += [out] - cmd = self.base_cmd + ["-fsyntax-only", "-Xclang", "-dependency-dot", "-Xclang", "%s" % out, '-xc++', '-'] - libcxx.util.executeCommandOrDie(cmd, cwd=self.cwd, input='#include <%s>\n\n' % header_name) - return outputs - - def _get_base_command(self, command_file): - commands = None - with open(command_file, 'r') as f: - commands = json.load(f) - for compile_cmd in commands: - file = compile_cmd['file'] - if not file.endswith('src/algorithm.cpp'): - continue - wd = compile_cmd['directory'] - cmd_str = compile_cmd['command'] - cmd = shlex.split(cmd_str) - out_arg = cmd.index('-o') - del cmd[out_arg] - del cmd[out_arg] - in_arg = cmd.index('-c') - del cmd[in_arg] - del cmd[in_arg] - if self.new_std is not None: - for f in cmd: - if f.startswith('-std='): - del cmd[cmd.index(f)] - cmd += [self.new_std] - break - return wd, cmd - print_and_exit("failed to find command to build algorithm.cpp") - -def post_process_outputs(outputs, libcxx_only): - graphs = [] - for dot_file in outputs: - g = dot.DirectedGraph.fromDotFile(dot_file) - rename_headers_and_remove_test_root(g) - if libcxx_only: - remove_non_std_headers(g) - graphs += [g] - g.toDotFile(dot_file) - return graphs - -def build_canonical_names(graphs): - canonical_names = {} - next_idx = 0 - for g in graphs: - for n in g.nodes: - if n.attributes['label'] not in canonical_names: - name = 'header_%d' % next_idx - next_idx += 1 - canonical_names[n.attributes['label']] = name - return canonical_names - - - -class CanonicalGraphBuilder(object): - def __init__(self, graphs): - self.graphs = list(graphs) - self.canonical_names = build_canonical_names(graphs) - - def build(self): - self.canonical = dot.DirectedGraph('all_headers') - for k,v in self.canonical_names.iteritems(): - n = dot.Node(v, edges=[], attributes={'shape': 'box', 'label': k}) - self.canonical.addNode(n) - for g in self.graphs: - self._merge_graph(g) - return self.canonical - - def _merge_graph(self, g): - for n in g.nodes: - new_name = self.canonical.getNodeByLabel(n.attributes['label']).id - for e in n.edges: - to_node = self.canonical.getNodeByLabel(e.attributes['label']).id - self.canonical.addEdge(new_name, to_node) - - -def main(): - parser = ArgumentParser( - description="Generate a graph of libc++ header dependencies") - parser.add_argument( - '-v', '--verbose', dest='verbose', action='store_true', default=False) - parser.add_argument( - '-o', '--output', dest='output', required=True, - help='The output file. stdout is used if not given', - type=str, action='store') - parser.add_argument( - '--no-compile', dest='no_compile', action='store_true', default=False) - parser.add_argument( - '--libcxx-only', dest='libcxx_only', action='store_true', default=False) - parser.add_argument( - 'compile_commands', metavar='compile-commands-file', - help='the compile commands database') - - args = parser.parse_args() - builder = DependencyCommand(args.compile_commands, args.output, new_std='-std=c++2a') - if not args.no_compile: - outputs = builder.run_for_headers(get_libcxx_headers()) - graphs = post_process_outputs(outputs, args.libcxx_only) - else: - outputs = [os.path.join(args.output, l) for l in os.listdir(args.output) if not l.endswith('all_headers.dot')] - graphs = [dot.DirectedGraph.fromDotFile(o) for o in outputs] - - canon = CanonicalGraphBuilder(graphs).build() - canon.toDotFile(os.path.join(args.output, 'all_headers.dot')) - all_graphs = graphs + [canon] - found_cycles = False - for g in all_graphs: - cycle_finder = dot.CycleFinder(g) - all_cycles = cycle_finder.findCyclesInGraph() - if len(all_cycles): - found_cycles = True - print("cycle in graph %s" % g.name) - for start, path in all_cycles: - print("Cycle for %s = %s" % (start, path)) - if not found_cycles: - print("No cycles found") +def is_config_header(h): + return os.path.basename(h) in ['__config', '__libcpp_version'] + + +def is_experimental_header(h): + return ('experimental/' in h) or ('ext/' in h) + + +def is_support_header(h): + return '__support/' in h + + +class FileEntry: + def __init__(self, includes, individual_linecount): + self.includes = includes + self.individual_linecount = individual_linecount + self.cumulative_linecount = None # documentation: this gets filled in later + self.is_graph_root = None # documentation: this gets filled in later + + +def list_all_roots_under(root): + result = [] + for root, _, files in os.walk(root): + for fname in files: + if '__support' in root: + pass + elif ('.' in fname and not fname.endswith('.h')): + pass + else: + result.append(root + '/' + fname) + return result + + +def build_file_entry(fname, options): + assert os.path.exists(fname) + + def locate_header_file(h, paths): + for p in paths: + fullname = p + '/' + h + if os.path.exists(fullname): + return fullname + if options.error_on_file_not_found: + raise RuntimeError('Header not found: %s, included by %s' % (h, fname)) + return None + + local_includes = [] + system_includes = [] + linecount = 0 + with open(fname, 'r') as f: + for line in f.readlines(): + linecount += 1 + m = re.match(r'\s*#\s*include\s+"([^"]*)"', line) + if m is not None: + local_includes.append(m.group(1)) + m = re.match(r'\s*#\s*include\s+<([^>]*)>', line) + if m is not None: + system_includes.append(m.group(1)) + + fully_qualified_includes = [ + locate_header_file(h, options.search_dirs) + for h in system_includes + ] + [ + locate_header_file(h, os.path.dirname(fname)) + for h in local_includes + ] + + return FileEntry( + # If file-not-found wasn't an error, then skip non-found files + includes = [h for h in fully_qualified_includes if h is not None], + individual_linecount = linecount, + ) + + +def transitive_closure_of_includes(graph, h1): + visited = set() + def explore(graph, h1): + if h1 not in visited: + visited.add(h1) + for h2 in graph[h1].includes: + explore(graph, h2) + explore(graph, h1) + return visited + + +def transitively_includes(graph, h1, h2): + return (h1 != h2) and (h2 in transitive_closure_of_includes(graph, h1)) + + +def build_graph(roots, options): + original_roots = list(roots) + graph = {} + while roots: + frontier = roots + roots = [] + for fname in frontier: + if fname not in graph: + graph[fname] = build_file_entry(fname, options) + graph[fname].is_graph_root = (fname in original_roots) + roots += graph[fname].includes + for fname, entry in graph.items(): + entry.cumulative_linecount = sum(graph[h].individual_linecount for h in transitive_closure_of_includes(graph, fname)) + return graph + + +def get_graphviz(graph, options): + + def get_friendly_id(fname): + i = fname.index('include/') + assert(i >= 0) + result = fname[i+8:] + return result + + def get_decorators(fname, entry): + result = '' + if entry.is_graph_root: + result += ' [style=bold]' + if options.show_individual_line_counts and options.show_cumulative_line_counts: + result += ' [label="%s\\n%d indiv, %d cumul"]' % ( + get_friendly_id(fname), entry.individual_linecount, entry.cumulative_linecount + ) + elif options.show_individual_line_counts: + result += ' [label="%s\\n%d indiv"]' % (get_friendly_id(fname), entry.individual_linecount) + elif options.show_cumulative_line_counts: + result += ' [label="%s\\n%d cumul"]' % (get_friendly_id(fname), entry.cumulative_linecount) + return result + + result = '' + result += 'strict digraph {\n' + result += ' rankdir=LR;\n' + result += ' layout=dot;\n\n' + for fname, entry in graph.items(): + result += ' "%s"%s;\n' % (get_friendly_id(fname), get_decorators(fname, entry)) + for h in entry.includes: + if any(transitively_includes(graph, i, h) for i in entry.includes) and not options.show_transitive_edges: + continue + result += ' "%s" -> "%s";\n' % (get_friendly_id(fname), get_friendly_id(h)) + result += '}\n' + return result if __name__ == '__main__': - main() + parser = argparse.ArgumentParser(description='Produce a dependency graph of libc++ headers, in GraphViz dot format.') + parser.add_argument('--root', default=None, metavar='FILE', help='File or directory to be the root of the dependency graph') + parser.add_argument('-I', dest='search_dirs', default=[], action='append', metavar='DIR', help='Path(s) to search for local includes') + parser.add_argument('--show-transitive-edges', action='store_true', help='Show edges to headers that are transitively included anyway') + parser.add_argument('--show-config-headers', action='store_true', help='Show headers named __config') + parser.add_argument('--show-experimental-headers', action='store_true', help='Show headers in the experimental/ and ext/ directories') + parser.add_argument('--show-support-headers', action='store_true', help='Show headers in the __support/ directory') + parser.add_argument('--show-individual-line-counts', action='store_true', help='Include an individual line count in each node') + parser.add_argument('--show-cumulative-line-counts', action='store_true', help='Include a total line count in each node') + parser.add_argument('--error-on-file-not-found', action='store_true', help="Don't ignore failure to open an #included file") + + options = parser.parse_args() + + if options.root is None: + curr_dir = os.path.dirname(os.path.abspath(__file__)) + options.root = os.path.join(curr_dir, '../include') + + if options.search_dirs == [] and os.path.isdir(options.root): + options.search_dirs = [options.root] + + options.root = os.path.abspath(options.root) + options.search_dirs = [os.path.abspath(p) for p in options.search_dirs] + + if os.path.isdir(options.root): + roots = list_all_roots_under(options.root) + elif os.path.isfile(options.root): + roots = [options.root] + else: + raise RuntimeError('--root seems to be invalid') + + graph = build_graph(roots, options) + + # Eliminate certain kinds of "visual noise" headers, if asked for. + def should_keep(fname): + return all([ + options.show_config_headers or not is_config_header(fname), + options.show_experimental_headers or not is_experimental_header(fname), + options.show_support_headers or not is_support_header(fname), + ]) + + for fname in list(graph.keys()): + if should_keep(fname): + graph[fname].includes = [h for h in graph[fname].includes if should_keep(h)] + else: + del graph[fname] + + # Look for cycles. + no_cycles_detected = True + for fname, entry in graph.items(): + for h in entry.includes: + if transitively_includes(graph, h, fname): + print('Cycle detected between %s and %s' % (fname, h)) + no_cycles_detected = False + assert no_cycles_detected + + print(get_graphviz(graph, options)) diff --git a/libcxx/utils/libcxx/graph.py b/libcxx/utils/libcxx/graph.py deleted file mode 100644 --- a/libcxx/utils/libcxx/graph.py +++ /dev/null @@ -1,298 +0,0 @@ -#===----------------------------------------------------------------------===## -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -#===----------------------------------------------------------------------===## - -import platform -import os -from collections import defaultdict -import re -import libcxx.util - - -class DotEmitter(object): - def __init__(self, name): - self.name = name - self.node_strings = {} - self.edge_strings = [] - - def addNode(self, node): - res = str(node.id) - if len(node.attributes): - attr_strs = [] - for k,v in node.attributes.iteritems(): - attr_strs += ['%s="%s"' % (k, v)] - res += ' [ %s ]' % (', '.join(attr_strs)) - res += ';' - assert node.id not in self.node_strings - self.node_strings[node.id] = res - - def addEdge(self, n1, n2): - res = '%s -> %s;' % (n1.id, n2.id) - self.edge_strings += [res] - - def node_key(self, n): - id = n.id - assert id.startswith('\w*\d+') - - def emit(self): - node_definitions_list = [] - sorted_keys = self.node_strings.keys() - sorted_keys.sort() - for k in sorted_keys: - node_definitions_list += [self.node_strings[k]] - node_definitions = '\n '.join(node_definitions_list) - edge_list = '\n '.join(self.edge_strings) - return ''' -digraph "{name}" {{ - {node_definitions} - {edge_list} -}} -'''.format(name=self.name, node_definitions=node_definitions, edge_list=edge_list).strip() - - -class DotReader(object): - def __init__(self): - self.graph = DirectedGraph(None) - - def abortParse(self, msg="bad input"): - raise Exception(msg) - - def parse(self, data): - lines = [l.strip() for l in data.splitlines() if l.strip()] - maxIdx = len(lines) - idx = 0 - if not self.parseIntroducer(lines[idx]): - self.abortParse('failed to parse introducer') - idx += 1 - while idx < maxIdx: - if self.parseNodeDefinition(lines[idx]) or self.parseEdgeDefinition(lines[idx]): - idx += 1 - continue - else: - break - if idx == maxIdx or not self.parseCloser(lines[idx]): - self.abortParse("no closing } found") - return self.graph - - def parseEdgeDefinition(self, l): - edge_re = re.compile('^\s*(\w+)\s+->\s+(\w+);\s*$') - m = edge_re.match(l) - if not m: - return False - n1 = m.group(1) - n2 = m.group(2) - self.graph.addEdge(n1, n2) - return True - - def parseAttributes(self, raw_str): - attribute_re = re.compile('^\s*(\w+)="([^"]+)"') - parts = [l.strip() for l in raw_str.split(',') if l.strip()] - attribute_dict = {} - for a in parts: - m = attribute_re.match(a) - if not m: - self.abortParse('Bad attribute "%s"' % a) - attribute_dict[m.group(1)] = m.group(2) - return attribute_dict - - def parseNodeDefinition(self, l): - node_definition_re = re.compile('^\s*(\w+)\s+\[([^\]]+)\]\s*;\s*$') - m = node_definition_re.match(l) - if not m: - return False - id = m.group(1) - attributes = self.parseAttributes(m.group(2)) - n = Node(id, edges=[], attributes=attributes) - self.graph.addNode(n) - return True - - def parseIntroducer(self, l): - introducer_re = re.compile('^\s*digraph "([^"]+)"\s+{\s*$') - m = introducer_re.match(l) - if not m: - return False - self.graph.setName(m.group(1)) - return True - - def parseCloser(self, l): - closer_re = re.compile('^\s*}\s*$') - m = closer_re.match(l) - if not m: - return False - return True - -class Node(object): - def __init__(self, id, edges=[], attributes={}): - self.id = id - self.edges = set(edges) - self.attributes = dict(attributes) - - def addEdge(self, dest): - self.edges.add(dest) - - def __eq__(self, another): - if isinstance(another, str): - return another == self.id - return hasattr(another, 'id') and self.id == another.id - - def __hash__(self): - return hash(self.id) - - def __str__(self): - return self.attributes["label"] - - def __repr__(self): - return self.__str__() - res = self.id - if len(self.attributes): - attr = [] - for k,v in self.attributes.iteritems(): - attr += ['%s="%s"' % (k, v)] - res += ' [%s ]' % (', '.join(attr)) - return res - -class DirectedGraph(object): - def __init__(self, name=None, nodes=None): - self.name = name - self.nodes = set() if nodes is None else set(nodes) - - def setName(self, n): - self.name = n - - def _getNode(self, n_or_id): - if isinstance(n_or_id, Node): - return n_or_id - return self.getNode(n_or_id) - - def getNode(self, str_id): - assert isinstance(str_id, str) or isinstance(str_id, Node) - for s in self.nodes: - if s == str_id: - return s - return None - - def getNodeByLabel(self, l): - found = None - for s in self.nodes: - if s.attributes['label'] == l: - assert found is None - found = s - return found - - def addEdge(self, n1, n2): - n1 = self._getNode(n1) - n2 = self._getNode(n2) - assert n1 in self.nodes - assert n2 in self.nodes - n1.addEdge(n2) - - def addNode(self, n): - self.nodes.add(n) - - def removeNode(self, n): - n = self._getNode(n) - for other_n in self.nodes: - if other_n == n: - continue - new_edges = set() - for e in other_n.edges: - if e != n: - new_edges.add(e) - other_n.edges = new_edges - self.nodes.remove(n) - - def toDot(self): - dot = DotEmitter(self.name) - for n in self.nodes: - dot.addNode(n) - for ndest in n.edges: - dot.addEdge(n, ndest) - return dot.emit() - - @staticmethod - def fromDot(str): - reader = DotReader() - graph = reader.parse(str) - return graph - - @staticmethod - def fromDotFile(fname): - with open(fname, 'r') as f: - return DirectedGraph.fromDot(f.read()) - - def toDotFile(self, fname): - with open(fname, 'w') as f: - f.write(self.toDot()) - - def __repr__(self): - return self.toDot() - -class BFS(object): - def __init__(self, start): - self.visited = set() - self.to_visit = [] - self.start = start - - def __nonzero__(self): - return len(self.to_visit) != 0 - - def empty(self): - return len(self.to_visit) == 0 - - def push_back(self, node): - assert node not in self.visited - self.visited.add(node) - self.to_visit += [node] - - def maybe_push_back(self, node): - if node in self.visited: - return - self.push_back(node) - - def pop_front(self): - assert len(self.to_visit) - elem = self.to_visit[0] - del self.to_visit[0] - return elem - - def seen(self, n): - return n in self.visited - - - -class CycleFinder(object): - def __init__(self, graph): - self.graph = graph - - def findCycleForNode(self, n): - assert n in self.graph.nodes - all_paths = {} - all_cycles = [] - bfs = BFS(n) - bfs.push_back(n) - all_paths[n] = [n] - while bfs: - n = bfs.pop_front() - assert n in all_paths - for e in n.edges: - en = self.graph.getNode(e) - if not bfs.seen(en): - new_path = list(all_paths[n]) - new_path.extend([en]) - all_paths[en] = new_path - bfs.push_back(en) - if en == bfs.start: - all_cycles += [all_paths[n]] - return all_cycles - - def findCyclesInGraph(self): - all_cycles = [] - for n in self.graph.nodes: - cycle = self.findCycleForNode(n) - if cycle: - all_cycles += [(n, cycle)] - return all_cycles