diff --git a/clang/utils/analyzer/ProjectMap.py b/clang/utils/analyzer/ProjectMap.py new file mode 100644 --- /dev/null +++ b/clang/utils/analyzer/ProjectMap.py @@ -0,0 +1,94 @@ +import json +import os + +from typing import Any, Dict, List, NamedTuple, Optional + + +JSON = Dict[str, Any] + + +DEFAULT_MAP_FILE = "projects.json" + + +class ProjectInfo(NamedTuple): + """ + Information about a project to analyze. + """ + name: str + mode: int + enabled: bool = True + + +class ProjectMap: + """ + Project map stores info about all the "registered" projects. + """ + def __init__(self, path: Optional[str] = None, should_exist: bool = True): + """ + :param path: optional path to a project JSON file, when None defaults + to DEFAULT_MAP_FILE. + :param should_exist: flag to tell if it's an exceptional situation when + the project file doesn't exist, creates an empty + project list instead if we are not expecting it to + exist. + """ + if path is None: + path = os.path.join(os.path.abspath(os.curdir), DEFAULT_MAP_FILE) + + if not os.path.exists(path): + if should_exist: + raise ValueError( + f"Cannot find the project map file {path}" + f"\nRunning script for the wrong directory?\n") + else: + self._create_empty(path) + + self.path = path + self._load_projects() + + def save(self): + """ + Save project map back to its original file. + """ + self._save(self.projects, self.path) + + def _load_projects(self): + with open(self.path) as raw_data: + raw_projects = json.load(raw_data) + + if not isinstance(raw_projects, list): + raise ValueError( + "Project map should be a list of JSON objects") + + self.projects = self._parse(raw_projects) + + @staticmethod + def _parse(raw_projects: List[JSON]) -> List[ProjectInfo]: + return [ProjectMap._parse_project(raw_project) + for raw_project in raw_projects] + + @staticmethod + def _parse_project(raw_project: JSON) -> ProjectInfo: + try: + name: str = raw_project["name"] + build_mode: int = raw_project["mode"] + enabled: bool = raw_project.get("enabled", True) + return ProjectInfo(name, build_mode, enabled) + + except KeyError as e: + raise ValueError( + f"Project info is required to have a '{e.args[0]}' field") + + @staticmethod + def _create_empty(path: str): + ProjectMap._save([], path) + + @staticmethod + def _save(projects: List[ProjectInfo], path: str): + with open(path, "w") as output: + json.dump(ProjectMap._convert_infos_to_dicts(projects), + output, indent=2) + + @staticmethod + def _convert_infos_to_dicts(projects: List[ProjectInfo]) -> List[JSON]: + return [project._asdict() for project in projects] diff --git a/clang/utils/analyzer/SATestAdd.py b/clang/utils/analyzer/SATestAdd.py --- a/clang/utils/analyzer/SATestAdd.py +++ b/clang/utils/analyzer/SATestAdd.py @@ -43,13 +43,11 @@ > changes_for_analyzer.patch """ import SATestBuild +from ProjectMap import ProjectMap, ProjectInfo -import csv import os import sys -from typing import IO - def add_new_project(name: str, build_mode: int): """ @@ -57,9 +55,10 @@ :param name: is a short string used to identify a project. """ - project_info = SATestBuild.ProjectInfo(name, build_mode, - is_reference_build=True) - tester = SATestBuild.ProjectTester(project_info) + project_info = ProjectInfo(name, build_mode) + test_info = SATestBuild.TestInfo(project_info, + is_reference_build=True) + tester = SATestBuild.ProjectTester(test_info) project_dir = tester.get_project_dir() if not os.path.exists(project_dir): @@ -70,33 +69,20 @@ tester.test() # Add the project name to the project map. - project_map_path = SATestBuild.get_project_map_path(should_exist=False) + project_map = ProjectMap(should_exist=False) - if os.path.exists(project_map_path): - file_mode = "r+" + if is_existing_project(project_map, name): + print(f"Warning: Project with name '{name}' already exists.", + file=sys.stdout) + print("Reference output has been regenerated.", file=sys.stdout) else: - print("Warning: Creating the project map file!") - file_mode = "w+" - - with open(project_map_path, file_mode) as map_file: - if is_existing_project(map_file, name): - print(f"Warning: Project with name '{name}' already exists.", - file=sys.stdout) - print("Reference output has been regenerated.", file=sys.stdout) - else: - map_writer = csv.writer(map_file) - map_writer.writerow((name, build_mode)) - print(f"The project map is updated: {project_map_path}") - - -def is_existing_project(map_file: IO, project_name: str) -> bool: - map_reader = csv.reader(map_file) + project_map.projects.append(project_info) + project_map.save() - for raw_info in map_reader: - if project_name == raw_info[0]: - return True - return False +def is_existing_project(project_map: ProjectMap, project_name: str) -> bool: + return any(existing_project.name == project_name + for existing_project in project_map.projects) # TODO: Use argparse diff --git a/clang/utils/analyzer/SATestBuild.py b/clang/utils/analyzer/SATestBuild.py --- a/clang/utils/analyzer/SATestBuild.py +++ b/clang/utils/analyzer/SATestBuild.py @@ -44,9 +44,9 @@ """ import CmpRuns import SATestUtils +from ProjectMap import ProjectInfo, ProjectMap import argparse -import csv import glob import logging import math @@ -59,9 +59,11 @@ import time from queue import Queue +# mypy has problems finding InvalidFileException in the module +# and this is we can shush that false positive +from plistlib import InvalidFileException # type:ignore from subprocess import CalledProcessError, check_call -from typing import (cast, Dict, Iterable, IO, List, NamedTuple, Optional, - Tuple, TYPE_CHECKING) +from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING ############################################################################### @@ -105,9 +107,6 @@ # Number of jobs. MAX_JOBS = int(math.ceil(multiprocessing.cpu_count() * 0.75)) -# Project map stores info about all the "registered" projects. -PROJECT_MAP_FILE = "projectMap.csv" - # Names of the project specific scripts. # The script that downloads the project. DOWNLOAD_SCRIPT = "download_project.sh" @@ -187,18 +186,6 @@ ############################################################################### -def get_project_map_path(should_exist: bool = True) -> str: - project_map_path = os.path.join(os.path.abspath(os.curdir), - PROJECT_MAP_FILE) - - if should_exist and not os.path.exists(project_map_path): - stderr(f"Error: Cannot find the project map file {project_map_path}" - f"\nRunning script for the wrong directory?\n") - sys.exit(1) - - return project_map_path - - def run_cleanup_script(directory: str, build_log_file: IO): """ Run pre-processing script if any. @@ -268,12 +255,11 @@ sys.exit(1) -class ProjectInfo(NamedTuple): +class TestInfo(NamedTuple): """ Information about a project and settings for its analysis. """ - name: str - build_mode: int + project: ProjectInfo override_compiler: bool = False extra_analyzer_config: str = "" is_reference_build: bool = False @@ -287,9 +273,9 @@ # It is a common workaround for this situation: # https://mypy.readthedocs.io/en/stable/common_issues.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime if TYPE_CHECKING: - ProjectQueue = Queue[ProjectInfo] # this is only processed by mypy + TestQueue = Queue[TestInfo] # this is only processed by mypy else: - ProjectQueue = Queue # this will be executed at runtime + TestQueue = Queue # this will be executed at runtime class RegressionTester: @@ -306,25 +292,24 @@ self.strictness = strictness def test_all(self) -> bool: - projects_to_test: List[ProjectInfo] = [] - - with open(get_project_map_path(), "r") as map_file: - validate_project_file(map_file) - - # Test the projects. - for proj_name, proj_build_mode in get_projects(map_file): - projects_to_test.append( - ProjectInfo(proj_name, int(proj_build_mode), - self.override_compiler, - self.extra_analyzer_config, - self.regenerate, self.strictness)) + projects_to_test: List[TestInfo] = [] + + project_map = ProjectMap() + + # Test the projects. + for project in project_map.projects: + projects_to_test.append( + TestInfo(project, + self.override_compiler, + self.extra_analyzer_config, + self.regenerate, self.strictness)) if self.jobs <= 1: return self._single_threaded_test_all(projects_to_test) else: return self._multi_threaded_test_all(projects_to_test) def _single_threaded_test_all(self, - projects_to_test: List[ProjectInfo]) -> bool: + projects_to_test: List[TestInfo]) -> bool: """ Run all projects. :return: whether tests have passed. @@ -336,7 +321,7 @@ return success def _multi_threaded_test_all(self, - projects_to_test: List[ProjectInfo]) -> bool: + projects_to_test: List[TestInfo]) -> bool: """ Run each project in a separate thread. @@ -345,7 +330,7 @@ :return: whether tests have passed. """ - tasks_queue = ProjectQueue() + tasks_queue = TestQueue() for project_info in projects_to_test: tasks_queue.put(project_info) @@ -370,13 +355,12 @@ """ A component aggregating testing for one project. """ - def __init__(self, project_info: ProjectInfo): - self.project_name = project_info.name - self.build_mode = project_info.build_mode - self.override_compiler = project_info.override_compiler - self.extra_analyzer_config = project_info.extra_analyzer_config - self.is_reference_build = project_info.is_reference_build - self.strictness = project_info.strictness + def __init__(self, test_info: TestInfo): + self.project = test_info.project + self.override_compiler = test_info.override_compiler + self.extra_analyzer_config = test_info.extra_analyzer_config + self.is_reference_build = test_info.is_reference_build + self.strictness = test_info.strictness def test(self) -> bool: """ @@ -384,7 +368,11 @@ :return tests_passed: Whether tests have passed according to the :param strictness: criteria. """ - stdout(f" \n\n--- Building project {self.project_name}\n") + if not self.project.enabled: + stdout(f" \n\n--- Skipping disabled project {self.project.name}\n") + return True + + stdout(f" \n\n--- Building project {self.project.name}\n") start_time = time.time() @@ -405,13 +393,13 @@ else: passed = run_cmp_results(project_dir, self.strictness) - stdout(f"Completed tests for project {self.project_name} " + stdout(f"Completed tests for project {self.project.name} " f"(time: {time.time() - start_time:.2f}).\n") return passed def get_project_dir(self) -> str: - return os.path.join(os.path.abspath(os.curdir), self.project_name) + return os.path.join(os.path.abspath(os.curdir), self.project.name) def get_output_dir(self) -> str: if self.is_reference_build: @@ -441,7 +429,7 @@ # Build and analyze the project. with open(build_log_path, "w+") as build_log_file: - if self.build_mode == 1: + if self.project.mode == 1: download_and_patch(directory, build_log_file) run_cleanup_script(directory, build_log_file) self.scan_build(directory, output_dir, build_log_file) @@ -451,7 +439,7 @@ if self.is_reference_build: run_cleanup_script(directory, build_log_file) normalize_reference_results(directory, output_dir, - self.build_mode) + self.project.mode) stdout(f"Build complete (time: {time.time() - time_start:.2f}). " f"See the log for more details: {build_log_path}\n") @@ -549,7 +537,7 @@ prefix += " -Xclang -analyzer-config " prefix += f"-Xclang {self.generate_config()} " - if self.build_mode == 2: + if self.project.mode == 2: prefix += "-std=c++11 " plist_path = os.path.join(directory, output_dir, "date") @@ -601,7 +589,7 @@ class TestProjectThread(threading.Thread): - def __init__(self, tasks_queue: ProjectQueue, + def __init__(self, tasks_queue: TestQueue, results_differ: threading.Event, failure_flag: threading.Event): """ @@ -621,13 +609,13 @@ def run(self): while not self.tasks_queue.empty(): try: - project_info = self.tasks_queue.get() + test_info = self.tasks_queue.get() - Logger = logging.getLogger(project_info.name) + Logger = logging.getLogger(test_info.project.name) LOCAL.stdout = StreamToLogger(Logger, logging.INFO) LOCAL.stderr = StreamToLogger(Logger, logging.ERROR) - tester = ProjectTester(project_info) + tester = ProjectTester(test_info) if not tester.test(): self.results_differ.set() @@ -841,7 +829,7 @@ os.remove(plist) continue - except plistlib.InvalidFileException as e: + except InvalidFileException as e: stderr(f"Error parsing plist file {plist}: {str(e)}") continue @@ -856,36 +844,6 @@ os.removedirs(subdir) -def get_projects(map_file: IO) -> Iterable[Tuple[str, str]]: - """ - Iterate over all projects defined in the project file handler `map_file` - from the start. - """ - map_file.seek(0) - # TODO: csv format is not very readable, change it to JSON - for project_info in csv.reader(map_file): - if SATestUtils.is_comment_csv_line(project_info): - continue - # suppress mypy error - yield cast(Tuple[str, str], project_info) - - -def validate_project_file(map_file: IO): - """ - Validate project file. - """ - for project_info in get_projects(map_file): - if len(project_info) != 2: - stderr("Error: Rows in the project map file " - "should have 2 entries.") - raise Exception() - - if project_info[1] not in ('0', '1', '2'): - stderr("Error: Second entry in the project map file should be 0" - " (single file), 1 (project), or 2(single file c++11).") - raise Exception() - - if __name__ == "__main__": # Parse command line arguments. parser = argparse.ArgumentParser( diff --git a/clang/utils/analyzer/SATestUpdateDiffs.py b/clang/utils/analyzer/SATestUpdateDiffs.py --- a/clang/utils/analyzer/SATestUpdateDiffs.py +++ b/clang/utils/analyzer/SATestUpdateDiffs.py @@ -4,6 +4,7 @@ Update reference results for static analyzer. """ import SATestBuild +from ProjectMap import ProjectInfo, ProjectMap import os import shutil @@ -14,9 +15,9 @@ Verbose = 0 -def update_reference_results(project_name: str, build_mode: int): - project_info = SATestBuild.ProjectInfo(project_name, build_mode) - tester = SATestBuild.ProjectTester(project_info) +def update_reference_results(project: ProjectInfo): + test_info = SATestBuild.TestInfo(project) + tester = SATestBuild.ProjectTester(test_info) project_dir = tester.get_project_dir() tester.is_reference_build = True @@ -54,7 +55,7 @@ SATestBuild.run_cleanup_script(project_dir, build_log_file) SATestBuild.normalize_reference_results( - project_dir, ref_results_path, build_mode) + project_dir, ref_results_path, project.mode) # Clean up the generated difference results. SATestBuild.cleanup_reference_results(ref_results_path) @@ -62,6 +63,7 @@ run_cmd(f"git add '{ref_results_path}'") +# TODO: use argparse def main(argv): if len(argv) == 2 and argv[1] in ("-h", "--help"): print("Update static analyzer reference results based " @@ -70,9 +72,9 @@ file=sys.stderr) sys.exit(1) - with open(SATestBuild.get_project_map_path(), "r") as f: - for project_name, build_mode in SATestBuild.get_projects(f): - update_reference_results(project_name, int(build_mode)) + project_map = ProjectMap() + for project in project_map.projects: + update_reference_results(project) if __name__ == '__main__':