Index: ModulesPerf/README.md =================================================================== --- /dev/null +++ ModulesPerf/README.md @@ -0,0 +1,20 @@ +A test suite to collect stats on modules' performance. + +# How to run the test suite + +For example, + + mkdir sandbox + ./run-tests.py --workdir ./sandbox --binary path/to/clang + + +# How to view stats + +It is not easy to understand the collected data. To view it in a more +manageable form use + + ./view-stats.py path/to/tests/directory + +To compare stats for different compiler builds + + ./view-stats.py path/to/baseline path/to/test/after/making/some/change Index: ModulesPerf/run-tests.py =================================================================== --- /dev/null +++ ModulesPerf/run-tests.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python + +import argparse +import datetime +import os +import shutil +import subprocess + + +class BaseInputGenerator(object): + def __init__(self, inputs_dir, modules_count): + assert modules_count >= 0, "Modules count cannot be negative" + self._inputs_dir = inputs_dir + self._modules_count = modules_count + self.frameworks_path = os.path.join(self._inputs_dir, "Frameworks") + self.impl_file_path = os.path.join(self._inputs_dir, "test.m") + + def _module_name(self, module_index): + return f"Mod{module_index}" + + def _module_import(self, module_name): + return f"#import <{module_name}/{module_name}.h>\n" + + def _module_header_path(self, module_name): + return os.path.join(self.frameworks_path, f"{module_name}.framework", "Headers", f"{module_name}.h") + + def _write_file(self, file_path, content, is_appending=False): + with open(file_path, "at" if is_appending else "wt") as f: + f.write(content) + + def _generate_framework(self, framework_name, content): + framework_dir = os.path.join(self.frameworks_path, f"{framework_name}.framework") + os.mkdir(framework_dir) + os.mkdir(os.path.join(framework_dir, "Headers")) + self._write_file(os.path.join(framework_dir, "Headers", f"{framework_name}.h"), content) + os.mkdir(os.path.join(framework_dir, "Modules")) + self._write_file(os.path.join(framework_dir, "Modules", "module.modulemap"), """ + framework module {name} {{ + umbrella header "{name}.h" + export * + module * {{ export * }} + }} + """.format(name=framework_name)) + + +class LinearDependencyInputGenerator(BaseInputGenerator): + """Represents a linear dependency topology. + + Mod1 <- Mod2 <- ... <- ModN <- test.m. + """ + + def generate_files(self): + last_module_name = None + if self._modules_count > 0: + # Generate frameworks. + os.mkdir(self.frameworks_path) + for i in range(self._modules_count): + content = self._module_import(last_module_name) if last_module_name is not None else "// First module\n" + module_name = self._module_name(i) + self._generate_framework(module_name, content) + last_module_name = module_name + # Generate implementation file. + self._write_file(self.impl_file_path, + self._module_import(last_module_name) + if last_module_name is not None + else "// no dependencies") + + def update_root_module(self): + assert self._modules_count > 0 + module_name = self._module_name(self._modules_count-1) + self._write_file(self._module_header_path(module_name), "\n// updated\n", is_appending=True) + + def update_leaf_module(self): + assert self._modules_count > 0 + module_name = self._module_name(0) + self._write_file(self._module_header_path(module_name), "\n// updated\n", is_appending=True) + + +class StarDependencyInputGenerator(BaseInputGenerator): + """Represents a star dependency topology. + + Mod0 <- Mod1, Mod0 <- Mod2, .., Mod0 <- ModN. + + It might be a horrible decision but in constructor `modules_count` means + the amount of edge modules. So even if `modules_count == 0`, we still have + one central module. + """ + + def generate_files(self): + # Generate frameworks. + os.mkdir(self.frameworks_path) + central_module_name = self._module_name(0) + central_module_import = self._module_import(central_module_name) + self._generate_framework(central_module_name, "// Central module\n") + for i in range(1, self._modules_count+1): + self._generate_framework(self._module_name(i), central_module_import) + # Implementation file is generated by subclasses. + + def update_central_module(self): + assert self._modules_count > 0 + module_name = self._module_name(0) + self._write_file(self._module_header_path(module_name), "\n// updated\n", is_appending=True) + + def update_edge_module(self): + assert self._modules_count > 0 + module_name = self._module_name(self._modules_count) + self._write_file(self._module_header_path(module_name), "\n// updated\n", is_appending=True) + +class StarDependencyOnCenterInputGenerator(StarDependencyInputGenerator): + def generate_files(self): + super(StarDependencyOnCenterInputGenerator, self).generate_files() + self._write_file(self.impl_file_path, self._module_import(self._module_name(0))) + +class StarDependencyOnEdgeInputGenerator(StarDependencyInputGenerator): + def __init__(self, inputs_dir, modules_count): + assert modules_count >= 1, "Need at least 1 edge module to depend on it" + super(StarDependencyOnEdgeInputGenerator, self).__init__(inputs_dir, modules_count) + + def generate_files(self): + super(StarDependencyOnEdgeInputGenerator, self).generate_files() + self._write_file(self.impl_file_path, self._module_import(self._module_name(self._modules_count))) + + +class TestExecutor(object): + def __init__(self, tests_root_dir, clang_path): + self._tests_root_dir = tests_root_dir + self._clang_path = clang_path + + def _compiler_command_fixed_input_size(self, test_dir, inputs_dir, input_generator): + return [self._clang_path, "-fsyntax-only", + "-fmodules", "-fmodules-cache-path=" + os.path.join(test_dir, "ModuleCache.noindex"), + "-fno-integrated-cc1", + "-save-stats=cwd", + "-F", input_generator.frameworks_path, + input_generator.impl_file_path] + + def _compiler_command_variable_input_size(self, test_dir, inputs_dir, input_generator): + return [self._clang_path, "-fsyntax-only", + "-fmodules", "-fmodules-cache-path=" + os.path.join(inputs_dir, "ModuleCache.noindex"), + "-fno-integrated-cc1", + "-save-stats=cwd", + "-F", input_generator.frameworks_path, + input_generator.impl_file_path] + + def _test_fixed_input_size(self, test_dir, input_generator_class, extra_stats_on_input_change): + # Generate test data. + inputs_dir = os.path.join(test_dir, "Input") + os.mkdir(inputs_dir) + input_generator = input_generator_class(inputs_dir, modules_count=5) + input_generator.generate_files() + + # Run clang and collect stats. + command = self._compiler_command_fixed_input_size(test_dir, inputs_dir, input_generator) + subprocess.check_call(command) + shutil.copy(os.path.join(test_dir, "test.stats"), os.path.join(test_dir, "clean_build.stats")) + + subprocess.check_call(command) + shutil.copy(os.path.join(test_dir, "test.stats"), os.path.join(test_dir, "noop_build.stats")) + + for (generator_method_name, stats_file_name) in extra_stats_on_input_change: + getattr(input_generator, generator_method_name)() + subprocess.check_call(command) + shutil.copy(os.path.join(test_dir, "test.stats"), os.path.join(test_dir, f"{stats_file_name}.stats")) + + def _test_variable_input_size(self, test_dir, input_generator_class, extra_stats_on_input_change): + for modules_count in range(1, 9): + inputs_dir = os.path.join(test_dir, f"InputSize{modules_count}") + os.mkdir(inputs_dir) + input_generator = input_generator_class(inputs_dir, modules_count) + input_generator.generate_files() + + # Run clang and collect stats. + command = self._compiler_command_variable_input_size(test_dir, inputs_dir, input_generator) + subprocess.check_call(command) + shutil.copy(os.path.join(test_dir, "test.stats"), os.path.join(test_dir, f"clean_build_size{modules_count}.stats")) + + subprocess.check_call(command) + shutil.copy(os.path.join(test_dir, "test.stats"), os.path.join(test_dir, f"noop_build_size{modules_count}.stats")) + + for (generator_method_name, stats_file_name) in extra_stats_on_input_change: + getattr(input_generator, generator_method_name)() + subprocess.check_call(command) + shutil.copy(os.path.join(test_dir, "test.stats"), os.path.join(test_dir, f"{stats_file_name}_size{modules_count}.stats")) + + def test_module_topology(self, test_name, input_generator_class, extra_stats_on_input_change=tuple()): + test_dir = os.path.join(self._tests_root_dir, test_name) + os.mkdir(test_dir) + os.chdir(test_dir) + self._test_fixed_input_size(test_dir, input_generator_class, extra_stats_on_input_change) + variable_test_dir = os.path.join(test_dir, "variable") + os.mkdir(variable_test_dir) + os.chdir(variable_test_dir) + self._test_variable_input_size(variable_test_dir, input_generator_class, extra_stats_on_input_change) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--binary", required=True, + help="path to Clang binary") + parser.add_argument("--workdir", required=True, + help="location where to store test results and temporary files") + args = parser.parse_args() + + test_run_dir = os.path.join(args.workdir, datetime.datetime.now().strftime("test-%Y-%m-%d_%H-%M-%S")) + test_run_dir = os.path.abspath(test_run_dir) + os.makedirs(test_run_dir) + print(f"Test results can be found in {test_run_dir}") + clang_path = os.path.abspath(args.binary) + + test_executor = TestExecutor(test_run_dir, clang_path) + + # Collect perf data for a linear dependency chain of modules. + # For incremental rebuilds test updating a root of the chain and a leaf. + test_executor.test_module_topology("linear_dependency", LinearDependencyInputGenerator, ( + ("update_root_module", "incremental_update_root"), + ("update_leaf_module", "incremental_update_leaf"), + )) + + # Collect perf data for a star dependency topology. + test_executor.test_module_topology("star_dependency_on_center", StarDependencyOnCenterInputGenerator, ( + ("update_central_module", "incremental_update_center"), + ("update_edge_module", "incremental_update_edge"), + )) + + test_executor.test_module_topology("star_dependency_on_edge", StarDependencyOnEdgeInputGenerator, ( + ("update_central_module", "incremental_update_center"), + ("update_edge_module", "incremental_update_edge"), + )) Index: ModulesPerf/view-stats.py =================================================================== --- /dev/null +++ ModulesPerf/view-stats.py @@ -0,0 +1,483 @@ +#!/usr/bin/env python + +import argparse +import collections +import glob +import http.server +import json +import os + + +SCRIPT = """ +function collectMetricData(metrics, buildKindNames) { + // Collect metric names. + var metricNames = new Set() + Object.values(metrics.fixed_input_size).forEach(metricsDict => { + Object.keys(metricsDict).forEach(metricName => { + metricNames.add(metricName) + }) + }) + metricNames = Array.from(metricNames) + metricNames.sort() + + // Collect build kinds. At first take values from provided `buildKindNames`, then sort the remainders. + var allBuildKinds = new Set(Object.keys(metrics.fixed_input_size)) + var buildKinds = [] + buildKindNames.forEach(buildKindName => { + if (allBuildKinds.has(buildKindName)) { + buildKinds.push(buildKindName) + allBuildKinds.delete(buildKindName) + } + }) + if (allBuildKinds.size > 0) { + var remaining = Array.from(allBuildKinds) + remaining.sort() + buildKinds = buildKinds.concat(remaining) + } + + // Collect data to be displayed in a table. It is a dictionary of dictionaries. + // First we index by a metric name, then by build kind. + var fixedData = new Object() + metricNames.forEach(name => { fixedData[name] = new Object() }) + Object.entries(metrics.fixed_input_size).forEach(entry => { + let buildKind = entry[0] + let metricDict = entry[1] + Object.entries(metricDict).forEach(metricEntry => { + fixedData[metricEntry[0]][buildKind] = metricEntry[1] + }) + }) + + var variableData = new Object() + metricNames.forEach(name => { + variableData[name] = new Object() + buildKinds.forEach(buildKind => { variableData[name][buildKind] = new Array() }) + }) + Object.entries(metrics.variable_input_size).forEach(entry => { + let buildKind = entry[0] + let variableDict = entry[1] + Object.entries(variableDict).forEach(variableEntry => { + let xValue = variableEntry[0] + let metricDict = variableEntry[1] + Object.entries(metricDict).forEach(metricEntry => { + variableData[metricEntry[0]][buildKind].push({x: xValue, y: metricEntry[1]}) + }) + }) + }) + metricNames.forEach(metricName => { + buildKinds.forEach(buildKind => { + variableData[metricName][buildKind].sort((a, b) => d3.ascending(a.x, b.x)) + }) + }) + + return { + metricNames: metricNames, + buildKinds: buildKinds, + fixedSizeData: [fixedData], + variableSizeData: [variableData], + } +} + +function mergeSorted(leftArray, rightArray) { + var result = new Array() + var left = 0, leftLength = leftArray.length, + right = 0, rightLength = rightArray.length + while ((left < leftLength) && (right < rightLength)) { + if (leftArray[left] < rightArray[right]) { + result.push(leftArray[left]) + ++left + } else if (leftArray[left] > rightArray[right]) { + result.push(rightArray[right]) + ++right + } else { + result.push(leftArray[left]) + ++left + ++right + } + } + if (left < leftLength) { + result = result.concat(leftArray.slice(left)) + } + if (right < rightLength) { + result = result.concat(rightArray.slice(right)) + } + return result +} + +function mergeMetricData(leftMetricData, rightMetricData) { + return { + metricNames: mergeSorted(leftMetricData.metricNames, rightMetricData.metricNames), + buildKinds: mergeSorted(leftMetricData.buildKinds, rightMetricData.buildKinds), + fixedSizeData: leftMetricData.fixedSizeData.concat(rightMetricData.fixedSizeData), + variableSizeData: leftMetricData.variableSizeData.concat(rightMetricData.variableSizeData), + } +} + +function createFixedSizeMetricsTable(data) { + let table = document.createElement("table") + table.classList.add("stats-numbers") + { + let head = document.createElement("thead") + let row = document.createElement("tr") + row.appendChild(document.createElement("th")) + data.buildKinds.forEach(name => { + let cell = document.createElement("th") + cell.appendChild(document.createTextNode(name)) + row.appendChild(cell) + }) + head.appendChild(row) + table.appendChild(head) + } + { + let body = document.createElement("tbody") + data.metricNames.forEach(metricName => { + let row = document.createElement("tr") + let cell = document.createElement("th") + cell.appendChild(document.createTextNode(metricName)) + row.appendChild(cell) + data.buildKinds.forEach(buildKind => { + let cell = document.createElement("td") + let value = data.fixedSizeData[0][metricName][buildKind] + cell.appendChild(document.createTextNode(value == null ? "–" : value)) + row.appendChild(cell) + }) + body.appendChild(row) + }) + table.appendChild(body) + } + return table +} + +function createFixedSizeComparisonMetrics(data) { + let result = document.createElement("div") + data.buildKinds.forEach(buildKind => { + let table = document.createElement("table") + table.classList.add("stats-numbers") + let caption = document.createElement("caption") + caption.appendChild(document.createTextNode(buildKind)) + table.appendChild(caption) + let body = document.createElement("tbody") + data.metricNames.forEach(metricName => { + let row = document.createElement("tr") + let cell = document.createElement("th") + cell.appendChild(document.createTextNode(metricName)) + row.appendChild(cell) + data.fixedSizeData.forEach((dataDict, i) => { + let cell = document.createElement("td") + let value = dataDict[metricName][buildKind] + cell.appendChild(document.createTextNode(value == null ? "–" : value)) + row.appendChild(cell) + if (i !== 0) { + let diffCell = document.createElement("td") + let baselineValue = data.fixedSizeData[0][metricName][buildKind] + // For difference purposes treat missing values as 0. + let difference = (value || 0) - (baselineValue || 0) + if (difference != 0) { + diffCell.appendChild(document.createTextNode(difference)) + } + row.appendChild(diffCell) + } + }) + body.appendChild(row) + }) + table.appendChild(body) + result.appendChild(table) + }) + return result +} + +function createMetricPlot(dataSets, xExtent, yExtent) { + // Collect plot settings together to simplify appearance tweaking. + const plotSettings = { + width: 250, + height: 150, + margin: {top: 10, right: 10, bottom: 30, left: 40}, + } + + const x = d3.scaleLinear() + .domain(xExtent).nice() + .range([plotSettings.margin.left, plotSettings.width - plotSettings.margin.right]) + + const y = d3.scaleLinear() + .domain(yExtent).nice() + .range([plotSettings.height - plotSettings.margin.bottom, plotSettings.margin.top]) + + const svg = d3.create("svg") + .attr("width", plotSettings.width) + .attr("height", plotSettings.height) + + svg.append("g") + .attr("transform", `translate(0,${plotSettings.height - plotSettings.margin.bottom})`) + .call(d3.axisBottom(x).ticks(3)) + + svg.append("g") + .attr("transform", `translate(${plotSettings.margin.left},0)`) + .call(d3.axisLeft(y).ticks(3)) + + const line = d3.line() + .x(d => x(d.x)) + .y(d => y(d.y)) + + let colors = d3.scaleOrdinal(d3.schemeCategory10) + dataSets.forEach((dataSet, i) => { + if (dataSet.length == 0) { + return + } + svg.append("path") + .datum(dataSet) + .attr("fill", "none") + .attr("stroke", colors(i)) + .attr("stroke-width", 1) + .attr("stroke-linejoin", "round") + .attr("stroke-linecap", "round") + .attr("d", line) + }) + return svg.node() +} + +function createVariableSizeMetricPlots(data) { + let table = document.createElement("table") + table.classList.add("stats-plots") + // Table head DOM manipulation. + { + let head = document.createElement("thead") + let row = document.createElement("tr") + row.appendChild(document.createElement("th")) + data.buildKinds.forEach(name => { + let cell = document.createElement("th") + cell.appendChild(document.createTextNode(name)) + row.appendChild(cell) + }) + head.appendChild(row) + table.appendChild(head) + } + // Share x-domain across all plots. + let xMin = d3.min(data.variableSizeData, completeDict => { + return d3.min(Object.values(completeDict), dataSeriesDict => { + return d3.min(Object.values(dataSeriesDict), data => d3.min(data, d => d.x)) + }) + }) + let xMax = d3.max(data.variableSizeData, completeDict => { + return d3.max(Object.values(completeDict), dataSeriesDict => { + return d3.max(Object.values(dataSeriesDict), data => d3.max(data, d => d.x)) + }) + }) + // Table body DOM manipulation. + { + let body = document.createElement("tbody") + data.metricNames.forEach(metricName => { + // Share y-domain across plots in the same row. + let yMin = d3.min(data.variableSizeData, completeDict => { + return d3.min(Object.values(completeDict[metricName]), data => { + return d3.min(data, d => d.y) + }) + }) + let yMax = d3.max(data.variableSizeData, completeDict => { + return d3.max(Object.values(completeDict[metricName]), data => { + return d3.max(data, d => d.y) + }) + }) + + let row = document.createElement("tr") + let cell = document.createElement("th") + cell.appendChild(document.createTextNode(metricName)) + row.appendChild(cell) + data.buildKinds.forEach(buildKind => { + let cell = document.createElement("td") + let dataSeries = data.variableSizeData.map(completeDict => completeDict[metricName][buildKind]) + if (dataSeries.some(data => data.length > 0)) { + cell.appendChild(createMetricPlot(dataSeries, [d3.min([xMin, 0]), xMax], [d3.min([yMin, 0]), yMax])) + } + row.appendChild(cell) + }) + body.appendChild(row) + }) + table.appendChild(body) + } + return table +} + +function displayStats(statsDataRuns, testName, placeholderNodeId) { + const STABLE_BUILD_KINDS = ["clean_build", "noop_build"] + var metricData = collectMetricData(statsDataRuns[0][testName], STABLE_BUILD_KINDS) + for (let i = 1; i < statsDataRuns.length; i++) { + metricData = mergeMetricData(metricData, + collectMetricData(statsDataRuns[i][testName], STABLE_BUILD_KINDS)) + } + let fixedSizeMetrics = metricData.fixedSizeData.length > 1 ? + createFixedSizeComparisonMetrics(metricData) : + createFixedSizeMetricsTable(metricData) + let variableSizePlots = createVariableSizeMetricPlots(metricData) + + let placeholderNode = document.getElementById(placeholderNodeId) + placeholderNode.appendChild(fixedSizeMetrics) + placeholderNode.appendChild(variableSizePlots) +} + +document.addEventListener("DOMContentLoaded", event => { + displayStats(stats, "linear_dependency", "linear-dependency-placeholder") + displayStats(stats, "star_dependency_on_center", "star-dependency-on-center-placeholder") + displayStats(stats, "star_dependency_on_edge", "star-dependency-on-edge-placeholder") +}) +""" + +CSS = """ +body { + font-family: Helvetica, Arial, sans-serif; + margin: 0; + padding: 0; +} + +.content { + margin: 0 20px; +} + +table { + border-spacing: 0; +} + +.stats-numbers tbody { + line-height: 1.3; +} +.stats-numbers td { + text-align: right; /* numbers should be aligned to the right */ + padding-right: 10px; +} +.stats-numbers thead th { + padding-right: 5px; +} +.stats-numbers tbody th { + text-align: left; + font-weight: normal; +} +/* +.stats-numbers tbody tr:hover { + background-color: #dee5ff; +} +*/ + +.stats-numbers + .stats-plots, .stats-numbers + .stats-numbers { + margin-top: 10px; +} +.stats-plots tbody th { + text-align: left; + font-weight: normal; +} +""" + +PAGE = """ + + + + + Module stats + + + + + + +
+
+

Linear dependency chain

+

The dependency graph between modules looks like

+
Mod0 <- Mod1 <- Mod2 <- ... <- ModN <- test.m
+

We measure the cost of an incremental build when updating a leaf module (Mod0) and a root module (ModN).

+
+
+ +
+

Star dependency chain. Implementation depends on center

+

The dependency graph between modules looks like

+
Mod0 <- Mod1
+Mod0 <- Mod2
+...
+Mod0 <- ModN
+Mod0 <- test.m
+

We measure the cost of an incremental build when updating a central module (Mod0) and an edge module (ModN).

+
+
+ +
+

Star dependency chain. Implementation depends on edge

+

The dependency graph between modules looks like

+
Mod0 <- Mod1
+Mod0 <- Mod2
+...
+Mod0 <- ModN
+        ModN <- test.m
+

We measure the cost of an incremental build when updating a central module (Mod0) and an edge module (ModN).

+
+
+
+ + +""" + +class RequestHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200) + self.send_header("Content-type", "text/html") + self.end_headers() + self.wfile.write(PAGE.format( + script=SCRIPT, css=CSS, + stats_json=json.dumps(self.server.stats), + ).encode()) + + +def read_stats_files(files_root_dir): + fixed_stats = {} + fixed_test_files = glob.glob(os.path.join(files_root_dir, "*.stats")) + assert len(fixed_test_files) > 0, "Expect some test files" + for file_path in fixed_test_files: + basename = os.path.basename(file_path) + if basename == "test.stats": + continue + mode_name, _ = os.path.splitext(basename) + with open(file_path, "rt") as f: + fixed_stats[mode_name] = json.load(f) + + variable_stats = collections.defaultdict(dict) + SIZE_LABEL = "_size" + variable_test_files = glob.glob(os.path.join(files_root_dir, "variable", f"*{SIZE_LABEL}*.stats")) + assert len(variable_test_files) > 0, "Expect some test files" + for file_path in variable_test_files: + basename = os.path.basename(file_path) + name, _ = os.path.splitext(basename) + pos = name.rfind(SIZE_LABEL) + mode_name = name[:pos] + size = int(name[pos+len(SIZE_LABEL):]) + with open(file_path, "rt") as f: + variable_stats[mode_name][size] = json.load(f) + return {"fixed_input_size": fixed_stats, "variable_input_size": variable_stats} + +def load_stats(test_run_locations): + assert len(test_run_locations) > 0, "Need at least one test run location" + result = [] + for test_run_location in test_run_locations: + stats = {} + with os.scandir(test_run_location) as dir_iter: + for test_dir in dir_iter: + if not test_dir.is_dir(): + continue + stats[test_dir.name] = read_stats_files(test_dir.path) + result.append(stats) + return result + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("stats_directories", metavar="stats-directory", nargs="+", + help="path to the directory with different stats") + args = parser.parse_args() + + server_address = ('', 8000) + httpd = http.server.HTTPServer(server_address, RequestHandler) + httpd.stats = load_stats(args.stats_directories) + print(f"Running server on 127.0.0.1:{server_address[1]}...") + httpd.serve_forever()