diff --git a/clang/utils/analyzer/ProjectMap.py b/clang/utils/analyzer/ProjectMap.py --- a/clang/utils/analyzer/ProjectMap.py +++ b/clang/utils/analyzer/ProjectMap.py @@ -1,7 +1,7 @@ import json import os -from enum import Enum +from enum import auto, Enum from typing import Any, Dict, List, NamedTuple, Optional, Tuple @@ -17,6 +17,64 @@ SCRIPT = "script" +class Size(int, Enum): + """ + Size of the project. + + Sizes do not directly correspond to the number of lines or files in the + project. The key factor that is important for the developers of the + analyzer is the time it takes to analyze the project. Here is how + the following sizes map to times: + + TINY: <1min + SMALL: 1min-10min + BIG: 10min-1h + HUGE: >1h + + The borders are a bit of a blur, especially because analysis time varies + from one machine to another. However, the relative times will stay pretty + similar, and these groupings will still be helpful. + + UNSPECIFIED is a very special case, which is intentionally last in the list + of possible sizes. If the user wants to filter projects by one of the + possible sizes, we want projects with UNSPECIFIED size to be filtered out + for any given size. + """ + TINY = auto() + SMALL = auto() + BIG = auto() + HUGE = auto() + UNSPECIFIED = auto() + + @staticmethod + def from_str(raw_size: Optional[str]) -> "Size": + """ + Construct a Size object from an optional string. + + :param raw_size: optional string representation of the desired Size + object. None will produce UNSPECIFIED size. + + This method is case-insensitive, so raw sizes 'tiny', 'TINY', and + 'TiNy' will produce the same result. + """ + if raw_size is None: + return Size.UNSPECIFIED + + raw_size_upper = raw_size.upper() + # The implementation is decoupled from the actual values of the enum, + # so we can easily add or modify it without bothering about this + # function. + for possible_size in Size: + if possible_size.name == raw_size_upper: + return possible_size + + possible_sizes = [size.name.lower() for size in Size + # no need in showing our users this size + if size != Size.UNSPECIFIED] + raise ValueError(f"Incorrect project size '{raw_size}'. " + f"Available sizes are {possible_sizes}") + + class ProjectInfo(NamedTuple): """ Information about a project to analyze. @@ -27,6 +85,7 @@ origin: str = "" commit: str = "" enabled: bool = True + size: Size = Size.UNSPECIFIED def with_fields(self, **kwargs) -> "ProjectInfo": """ @@ -98,6 +157,7 @@ build_mode: int = raw_project["mode"] enabled: bool = raw_project.get("enabled", True) source: DownloadType = raw_project.get("source", "zip") + size = Size.from_str(raw_project.get("size", None)) if source == DownloadType.GIT: origin, commit = ProjectMap._get_git_params(raw_project) @@ -105,7 +165,7 @@ origin, commit = "", "" return ProjectInfo(name, build_mode, source, origin, commit, - enabled) + enabled, size) except KeyError as e: raise ValueError( diff --git a/clang/utils/analyzer/SATest.py b/clang/utils/analyzer/SATest.py --- a/clang/utils/analyzer/SATest.py +++ b/clang/utils/analyzer/SATest.py @@ -37,7 +37,7 @@ SATestBuild.VERBOSE = args.verbose - projects = get_projects(parser, args.projects) + projects = get_projects(parser, args) tester = SATestBuild.RegressionTester(args.jobs, projects, args.override_compiler, @@ -84,7 +84,7 @@ def benchmark(parser, args): from SATestBenchmark import Benchmark - projects = get_projects(parser, args.projects) + projects = get_projects(parser, args) benchmark = Benchmark(projects, args.iterations, args.output) benchmark.run() @@ -94,14 +94,19 @@ SATestBenchmark.compare(args.old, args.new, args.output) -def get_projects(parser, projects_str): - from ProjectMap import ProjectMap +def get_projects(parser, args): + from ProjectMap import ProjectMap, Size project_map = ProjectMap() projects = project_map.projects - if projects_str: - projects_arg = projects_str.split(",") + def filter_projects(projects, predicate, force=False): + return [project.with_fields(enabled=(force or project.enabled) and + predicate(project)) + for project in projects] + + if args.projects: + projects_arg = args.projects.split(",") available_projects = [project.name for project in projects] @@ -113,8 +118,17 @@ "{all}.".format(project=manual_project, all=available_projects)) - projects = [project.with_fields(enabled=project.name in projects_arg) - for project in projects] + projects = filter_projects(projects, lambda project: + project.name in projects_arg, + force=True) + + try: + max_size = Size.from_str(args.max_size) + except ValueError as e: + parser.error(f"{e}") + + projects = filter_projects(projects, lambda project: + project.size <= max_size) return projects @@ -238,6 +252,8 @@ help="Arguments passed to to -analyzer-config") build_parser.add_argument("--projects", action="store", default="", help="Comma-separated list of projects to test") + build_parser.add_argument("--max-size", action="store", default=None, + help="Maximum size for the projects to test") build_parser.add_argument("-v", "--verbose", action="count", default=0) build_parser.set_defaults(func=build) @@ -317,6 +333,8 @@ help="Output csv file for the benchmark results") bench_parser.add_argument("--projects", action="store", default="", help="Comma-separated list of projects to test") + bench_parser.add_argument("--max-size", action="store", default=None, + help="Maximum size for the projects to test") bench_parser.set_defaults(func=benchmark) bench_subparsers = bench_parser.add_subparsers() diff --git a/clang/utils/analyzer/projects/projects.json b/clang/utils/analyzer/projects/projects.json --- a/clang/utils/analyzer/projects/projects.json +++ b/clang/utils/analyzer/projects/projects.json @@ -4,139 +4,159 @@ "mode": 1, "source": "git", "origin": "https://github.com/jarro2783/cxxopts.git", - "commit": "794c975" + "commit": "794c975", + "size": "tiny" }, { "name": "box2d", "mode": 1, "source": "git", "origin": "https://github.com/erincatto/box2d.git", - "commit": "1025f9a" + "commit": "1025f9a", + "size": "small" }, { "name": "tinyexpr", "mode": 1, "source": "git", "origin": "https://github.com/codeplea/tinyexpr.git", - "commit": "ffb0d41" + "commit": "ffb0d41", + "size": "tiny" }, { "name": "symengine", "mode": 1, "source": "git", "origin": "https://github.com/symengine/symengine.git", - "commit": "4f669d59" + "commit": "4f669d59", + "size": "small" }, { "name": "termbox", "mode": 1, "source": "git", "origin": "https://github.com/nsf/termbox.git", - "commit": "0df1355" + "commit": "0df1355", + "size": "tiny" }, { "name": "tinyvm", "mode": 1, "source": "git", "origin": "https://github.com/jakogut/tinyvm.git", - "commit": "10c25d8" + "commit": "10c25d8", + "size": "tiny" }, { "name": "tinyspline", "mode": 1, "source": "git", "origin": "https://github.com/msteinbeck/tinyspline.git", - "commit": "f8b1ab7" + "commit": "f8b1ab7", + "size": "tiny" }, { "name": "oatpp", "mode": 1, "source": "git", "origin": "https://github.com/oatpp/oatpp.git", - "commit": "d3e60fb" + "commit": "d3e60fb", + "size": "small" }, { "name": "libsoundio", "mode": 1, "source": "git", "origin": "https://github.com/andrewrk/libsoundio.git", - "commit": "b810bf2" + "commit": "b810bf2", + "size": "tiny" }, { "name": "zstd", "mode": 1, "source": "git", "origin": "https://github.com/facebook/zstd.git", - "commit": "2af4e073" + "commit": "2af4e073", + "size": "small" }, { "name": "simbody", "mode": 1, "source": "git", "origin": "https://github.com/simbody/simbody.git", - "commit": "5cf513d" + "commit": "5cf513d", + "size": "big" }, { "name": "duckdb", "mode": 1, "source": "git", "origin": "https://github.com/cwida/duckdb.git", - "commit": "d098c9f" + "commit": "d098c9f", + "size": "big" }, { "name": "drogon", "mode": 1, "source": "git", "origin": "https://github.com/an-tao/drogon.git", - "commit": "fd2a612" + "commit": "fd2a612", + "size": "small" }, { "name": "fmt", "mode": 1, "source": "git", "origin": "https://github.com/fmtlib/fmt.git", - "commit": "5e7c70e" + "commit": "5e7c70e", + "size": "small" }, { "name": "re2", "mode": 1, "source": "git", "origin": "https://github.com/google/re2.git", - "commit": "2b25567" + "commit": "2b25567", + "size": "small" }, { "name": "cppcheck", "mode": 1, "source": "git", "origin": "https://github.com/danmar/cppcheck.git", - "commit": "5fa3d53" + "commit": "5fa3d53", + "size": "small" }, { "name": "harfbuzz", "mode": 1, "source": "git", "origin": "https://github.com/harfbuzz/harfbuzz.git", - "commit": "f8d345e" + "commit": "f8d345e", + "size": "small" }, { "name": "capnproto", "mode": 1, "source": "git", "origin": "https://github.com/capnproto/capnproto.git", - "commit": "8be1c9f" + "commit": "8be1c9f", + "size": "small" }, { "name": "tmux", "mode": 1, "source": "git", "origin": "https://github.com/tmux/tmux.git", - "commit": "a5f99e1" + "commit": "a5f99e1", + "size": "big" }, { "name": "faiss", "mode": 1, "source": "git", "origin": "https://github.com/facebookresearch/faiss.git", - "commit": "9e5d5b7" + "commit": "9e5d5b7", + "size": "small" } ]