diff --git a/clang/utils/analyzer/SATest.py b/clang/utils/analyzer/SATest.py --- a/clang/utils/analyzer/SATest.py +++ b/clang/utils/analyzer/SATest.py @@ -34,29 +34,10 @@ def build(parser, args): import SATestBuild - from ProjectMap import ProjectMap SATestBuild.VERBOSE = args.verbose - project_map = ProjectMap() - projects = project_map.projects - - if args.projects: - projects_arg = args.projects.split(",") - available_projects = [project.name - for project in projects] - - # validate that given projects are present in the project map file - for manual_project in projects_arg: - if manual_project not in available_projects: - parser.error("Project '{project}' is not found in " - "the project map file. Available projects are " - "{all}.".format(project=manual_project, - all=available_projects)) - - projects = [project.with_fields(enabled=project.name in projects_arg) - for project in projects] - + projects = get_projects(parser, args.projects) tester = SATestBuild.RegressionTester(args.jobs, projects, args.override_compiler, @@ -100,6 +81,44 @@ SATestUpdateDiffs.update_reference_results(project) +def benchmark(parser, args): + from SATestBenchmark import Benchmark + + projects = get_projects(parser, args.projects) + benchmark = Benchmark(projects, args.iterations, args.output) + benchmark.run() + + +def benchmark_compare(parser, args): + import SATestBenchmark + SATestBenchmark.compare(args.old, args.new, args.output) + + +def get_projects(parser, projects_str): + from ProjectMap import ProjectMap + + project_map = ProjectMap() + projects = project_map.projects + + if projects_str: + projects_arg = projects_str.split(",") + available_projects = [project.name + for project in projects] + + # validate that given projects are present in the project map file + for manual_project in projects_arg: + if manual_project not in available_projects: + parser.error("Project '{project}' is not found in " + "the project map file. Available projects are " + "{all}.".format(project=manual_project, + all=available_projects)) + + projects = [project.with_fields(enabled=project.name in projects_arg) + for project in projects] + + return projects + + def docker(parser, args): if len(args.rest) > 0: if args.rest[0] != "--": @@ -284,6 +303,36 @@ "to the docker's entrypoint.") dock_parser.set_defaults(func=docker) + # benchmark subcommand + bench_parser = subparsers.add_parser( + "benchmark", + help="Run benchmarks by building a set of projects multiple times.") + + bench_parser.add_argument("-i", "--iterations", action="store", + type=int, default=20, + help="Number of iterations for building each " + "project.") + bench_parser.add_argument("-o", "--output", action="store", + default="benchmark.csv", + help="Output csv file for the benchmark results") + bench_parser.add_argument("--projects", action="store", default="", + help="Comma-separated list of projects to test") + bench_parser.set_defaults(func=benchmark) + + bench_subparsers = bench_parser.add_subparsers() + bench_compare_parser = bench_subparsers.add_parser( + "compare", + help="Compare benchmark runs.") + bench_compare_parser.add_argument("--old", action="store", required=True, + help="Benchmark reference results to " + "compare agains.") + bench_compare_parser.add_argument("--new", action="store", required=True, + help="New benchmark results to check.") + bench_compare_parser.add_argument("-o", "--output", + action="store", required=True, + help="Output file for plots.") + bench_compare_parser.set_defaults(func=benchmark_compare) + args = parser.parse_args() args.func(parser, args) diff --git a/clang/utils/analyzer/SATestBenchmark.py b/clang/utils/analyzer/SATestBenchmark.py new file mode 100644 --- /dev/null +++ b/clang/utils/analyzer/SATestBenchmark.py @@ -0,0 +1,158 @@ +""" +Static Analyzer qualification infrastructure. + +This source file contains all the functionality related to benchmarking +the analyzer on a set projects. Right now, this includes measuring +execution time and peak memory usage. Benchmark runs analysis on every +project multiple times to get a better picture about the distribution +of measured values. + +Additionally, this file includes a comparison routine for two benchmarking +results that plots the result together on one chart. +""" + +import SATestUtils as utils +from SATestBuild import ProjectTester, stdout, TestInfo +from ProjectMap import ProjectInfo + +import pandas as pd +from typing import List, Tuple + + +INDEX_COLUMN = "index" + + +def _save(data: pd.DataFrame, file_path: str): + data.to_csv(file_path, index_label=INDEX_COLUMN) + + +def _load(file_path: str) -> pd.DataFrame: + return pd.read_csv(file_path, index_col=INDEX_COLUMN) + + +class Benchmark: + """ + Becnhmark class encapsulates one functionality: it runs the analysis + multiple times for the given set of projects and stores results in the + specified file. + """ + def __init__(self, projects: List[ProjectInfo], iterations: int, + output_path: str): + self.projects = projects + self.iterations = iterations + self.out = output_path + + def run(self): + results = [self._benchmark_project(project) + for project in self.projects] + + data = pd.concat(results, ignore_index=True) + _save(data, self.out) + + def _benchmark_project(self, project: ProjectInfo) -> pd.DataFrame: + if not project.enabled: + stdout(f" \n\n--- Skipping disabled project {project.name}\n") + return + + stdout(f" \n\n--- Benchmarking project {project.name}\n") + + test_info = TestInfo(project) + tester = ProjectTester(test_info, silent=True) + project_dir = tester.get_project_dir() + output_dir = tester.get_output_dir() + + raw_data = [] + + for i in range(self.iterations): + stdout(f"Iteration #{i + 1}") + time, mem = tester.build(project_dir, output_dir) + raw_data.append({"time": time, "memory": mem, + "iteration": i, "project": project.name}) + stdout(f"time: {utils.time_to_str(time)}, " + f"peak memory: {utils.memory_to_str(mem)}") + + return pd.DataFrame(raw_data) + + +def compare(old_path: str, new_path: str, plot_file: str): + """ + Compare two benchmarking results stored as .csv files + and produce a plot in the specified file. + """ + old = _load(old_path) + new = _load(new_path) + + old_projects = set(old["project"]) + new_projects = set(new["project"]) + common_projects = old_projects & new_projects + + # Leave only rows for projects common to both dataframes. + old = old[old["project"].isin(common_projects)] + new = new[new["project"].isin(common_projects)] + + old, new = _normalize(old, new) + + # Seaborn prefers all the data to be in one dataframe. + old["kind"] = "old" + new["kind"] = "new" + data = pd.concat([old, new], ignore_index=True) + + # TODO: compare data in old and new dataframes using statistical tests + # to check if they belong to the same distribution + _plot(data, plot_file) + + +def _normalize(old: pd.DataFrame, + new: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]: + # This creates a dataframe with all numerical data averaged. + means = old.groupby("project").mean() + return _normalize_impl(old, means), _normalize_impl(new, means) + + +def _normalize_impl(data: pd.DataFrame, means: pd.DataFrame): + # Right now 'means' has one row corresponding to one project, + # while 'data' has N rows for each project (one for each iteration). + # + # In order for us to work easier with this data, we duplicate + # 'means' data to match the size of the 'data' dataframe. + # + # All the columns from 'data' will maintain their names, while + # new columns coming from 'means' will have "_mean" suffix. + joined_data = data.merge(means, on="project", suffixes=("", "_mean")) + _normalize_key(joined_data, "time") + _normalize_key(joined_data, "memory") + return joined_data + + +def _normalize_key(data: pd.DataFrame, key: str): + norm_key = _normalized_name(key) + mean_key = f"{key}_mean" + data[norm_key] = data[key] / data[mean_key] + + +def _normalized_name(name: str) -> str: + return f"normalized {name}" + + +def _plot(data: pd.DataFrame, plot_file: str): + import matplotlib + import seaborn as sns + from matplotlib import pyplot as plt + + sns.set_style("whitegrid") + # We want to have time and memory charts one above the other. + figure, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6)) + + def _subplot(key: str, ax: matplotlib.axes.Axes): + sns.boxplot(x="project", y=_normalized_name(key), hue="kind", + data=data, palette=sns.color_palette("BrBG", 2), ax=ax) + + _subplot("time", ax1) + # No need to have xlabels on both top and bottom charts. + ax1.set_xlabel("") + + _subplot("memory", ax2) + # The legend on the top chart is enough. + ax2.get_legend().remove() + + figure.savefig(plot_file) diff --git a/clang/utils/analyzer/SATestBuild.py b/clang/utils/analyzer/SATestBuild.py --- a/clang/utils/analyzer/SATestBuild.py +++ b/clang/utils/analyzer/SATestBuild.py @@ -87,10 +87,18 @@ return 0 -Logger = logging.getLogger("main") LOCAL = threading.local() -LOCAL.stdout = StreamToLogger(Logger, logging.INFO) -LOCAL.stderr = StreamToLogger(Logger, logging.ERROR) + + +def init_logger(name: str): + # TODO: use debug levels for VERBOSE messages + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + LOCAL.stdout = StreamToLogger(logger, logging.INFO) + LOCAL.stderr = StreamToLogger(logger, logging.ERROR) + + +init_logger("main") def stderr(message: str): @@ -102,7 +110,6 @@ logging.basicConfig( - level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s') @@ -298,12 +305,13 @@ """ A component aggregating testing for one project. """ - def __init__(self, test_info: TestInfo): + def __init__(self, test_info: TestInfo, silent: bool = False): self.project = test_info.project self.override_compiler = test_info.override_compiler self.extra_analyzer_config = test_info.extra_analyzer_config self.is_reference_build = test_info.is_reference_build self.strictness = test_info.strictness + self.silent = silent def test(self) -> bool: """ @@ -312,20 +320,19 @@ to the :param strictness: criteria. """ if not self.project.enabled: - stdout(f" \n\n--- Skipping disabled project {self.project.name}\n") + self.out( + f" \n\n--- Skipping disabled project {self.project.name}\n") return True - stdout(f" \n\n--- Building project {self.project.name}\n") + self.out(f" \n\n--- Building project {self.project.name}\n") start_time = time.time() project_dir = self.get_project_dir() - if VERBOSE >= 1: - stdout(f" Build directory: {project_dir}.\n") + self.vout(f" Build directory: {project_dir}.\n") # Set the build results directory. output_dir = self.get_output_dir() - output_dir = os.path.join(project_dir, output_dir) self.build(project_dir, output_dir) check_build(output_dir) @@ -336,8 +343,8 @@ else: passed = run_cmp_results(project_dir, self.strictness) - stdout(f"Completed tests for project {self.project.name} " - f"(time: {time.time() - start_time:.2f}).\n") + self.out(f"Completed tests for project {self.project.name} " + f"(time: {time.time() - start_time:.2f}).\n") return passed @@ -346,22 +353,23 @@ def get_output_dir(self) -> str: if self.is_reference_build: - return REF_PREFIX + OUTPUT_DIR_NAME + dirname = REF_PREFIX + OUTPUT_DIR_NAME else: - return OUTPUT_DIR_NAME + dirname = OUTPUT_DIR_NAME + + return os.path.join(self.get_project_dir(), dirname) - def build(self, directory: str, output_dir: str): + def build(self, directory: str, output_dir: str) -> Tuple[float, int]: build_log_path = get_build_log_path(output_dir) - stdout(f"Log file: {build_log_path}\n") - stdout(f"Output directory: {output_dir}\n") + self.out(f"Log file: {build_log_path}\n") + self.out(f"Output directory: {output_dir}\n") remove_log_file(output_dir) # Clean up scan build results. if os.path.exists(output_dir): - if VERBOSE >= 1: - stdout(f" Removing old results: {output_dir}\n") + self.vout(f" Removing old results: {output_dir}\n") shutil.rmtree(output_dir) @@ -374,7 +382,7 @@ self._download_and_patch(directory, build_log_file) run_cleanup_script(directory, build_log_file) build_time, memory = self.scan_build(directory, output_dir, - build_log_file) + build_log_file) else: build_time, memory = self.analyze_preprocessed(directory, output_dir) @@ -384,9 +392,11 @@ normalize_reference_results(directory, output_dir, self.project.mode) - stdout(f"Build complete (time: {utils.time_to_str(build_time)}, " - f"peak memory: {utils.memory_to_str(memory)}). " - f"See the log for more details: {build_log_path}\n") + self.out(f"Build complete (time: {utils.time_to_str(build_time)}, " + f"peak memory: {utils.memory_to_str(memory)}). " + f"See the log for more details: {build_log_path}\n") + + return build_time, memory def scan_build(self, directory: str, output_dir: str, build_log_file: IO) -> Tuple[float, int]: @@ -454,8 +464,7 @@ command_to_run = command_prefix + command - if VERBOSE >= 1: - stdout(f" Executing: {command_to_run}\n") + self.vout(f" Executing: {command_to_run}\n") time, mem = utils.check_and_measure_call( command_to_run, cwd=cwd, @@ -522,8 +531,7 @@ log_path = os.path.join(fail_path, file_name + ".stderr.txt") with open(log_path, "w+") as log_file: try: - if VERBOSE >= 1: - stdout(f" Executing: {command}\n") + self.vout(f" Executing: {command}\n") time, mem = utils.check_and_measure_call( command, cwd=directory, stderr=log_file, @@ -592,8 +600,10 @@ f"for the '{self.project.name}' project") def _download_from_git(self, directory: str, build_log_file: IO): + repo = self.project.origin cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME) - check_call(f"git clone --recursive {self.project.origin} {cached_source}", + + check_call(f"git clone --recursive {repo} {cached_source}", cwd=directory, stderr=build_log_file, stdout=build_log_file, shell=True) check_call(f"git checkout --quiet {self.project.commit}", @@ -624,16 +634,15 @@ out=LOCAL.stdout, err=LOCAL.stderr, verbose=VERBOSE) - @staticmethod - def _apply_patch(directory: str, build_log_file: IO): + def _apply_patch(self, directory: str, build_log_file: IO): patchfile_path = os.path.join(directory, PATCHFILE_NAME) patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME) if not os.path.exists(patchfile_path): - stdout(" No local patches.\n") + self.out(" No local patches.\n") return - stdout(" Applying patch.\n") + self.out(" Applying patch.\n") try: check_call(f"patch -p1 < '{patchfile_path}'", cwd=patched_source, @@ -646,6 +655,14 @@ f"See {build_log_file.name} for details.\n") sys.exit(1) + def out(self, what: str): + if not self.silent: + stdout(what) + + def vout(self, what: str): + if VERBOSE >= 1: + self.out(what) + class TestProjectThread(threading.Thread): def __init__(self, tasks_queue: TestQueue, @@ -668,10 +685,7 @@ while not self.tasks_queue.empty(): try: test_info = self.tasks_queue.get() - - Logger = logging.getLogger(test_info.project.name) - LOCAL.stdout = StreamToLogger(Logger, logging.INFO) - LOCAL.stderr = StreamToLogger(Logger, logging.ERROR) + init_logger(test_info.project.name) tester = ProjectTester(test_info) if not tester.test(): diff --git a/clang/utils/analyzer/SATestUpdateDiffs.py b/clang/utils/analyzer/SATestUpdateDiffs.py --- a/clang/utils/analyzer/SATestUpdateDiffs.py +++ b/clang/utils/analyzer/SATestUpdateDiffs.py @@ -21,10 +21,10 @@ project_dir = tester.get_project_dir() tester.is_reference_build = True - ref_results_path = os.path.join(project_dir, tester.get_output_dir()) + ref_results_path = tester.get_output_dir() tester.is_reference_build = False - created_results_path = os.path.join(project_dir, tester.get_output_dir()) + created_results_path = tester.get_output_dir() if not os.path.exists(created_results_path): print("New results not found, was SATestBuild.py previously run?", diff --git a/clang/utils/analyzer/requirements.txt b/clang/utils/analyzer/requirements.txt --- a/clang/utils/analyzer/requirements.txt +++ b/clang/utils/analyzer/requirements.txt @@ -1,4 +1,6 @@ graphviz humanize matplotlib +pandas psutil +seaborn