Index: clang/tools/scan-build-py/libscanbuild/analyze.py =================================================================== --- clang/tools/scan-build-py/libscanbuild/analyze.py +++ clang/tools/scan-build-py/libscanbuild/analyze.py @@ -52,7 +52,8 @@ args = parse_args_for_scan_build() # will re-assign the report directory as new output - with report_directory(args.output, args.keep_empty) as args.output: + with report_directory( + args.output, args.keep_empty, args.output_format) as args.output: # Run against a build command. there are cases, when analyzer run # is not required. But we need to set up everything for the # wrappers, because 'configure' needs to capture the CC/CXX values @@ -79,7 +80,7 @@ args = parse_args_for_analyze_build() # will re-assign the report directory as new output - with report_directory(args.output, args.keep_empty) as args.output: + with report_directory(args.output, args.keep_empty, args.output_format) as args.output: # Run the analyzer against a compilation db. govern_analyzer_runs(args) # Cover report generation and bug counting. @@ -336,7 +337,7 @@ @contextlib.contextmanager -def report_directory(hint, keep): +def report_directory(hint, keep, output_format): """ Responsible for the report directory. hint -- could specify the parent directory of the output directory. @@ -355,7 +356,11 @@ yield name finally: if os.listdir(name): - msg = "Run 'scan-view %s' to examine bug reports." + if output_format != 'sarif': + # 'scan-view' currently does not support sarif format. + msg = "Run 'scan-view %s' to examine bug reports." + else: + msg = "View result at %s/results-merged.sarif." keep = True else: if keep: @@ -433,7 +438,7 @@ 'direct_args', # arguments from command line 'force_debug', # kill non debug macros 'output_dir', # where generated report files shall go - 'output_format', # it's 'plist', 'html', both or plist-multi-file + 'output_format', # it's 'plist', 'html', 'plist-html', 'plist-multi-file', or 'sarif' 'output_failures', # generate crash reports or not 'ctu']) # ctu control options def run(opts): @@ -537,6 +542,12 @@ dir=opts['output_dir']) os.close(handle) return name + elif opts['output_format'] == 'sarif': + (handle, name) = tempfile.mkstemp(prefix='result-', + suffix='.sarif', + dir=opts['output_dir']) + os.close(handle) + return name return opts['output_dir'] try: Index: clang/tools/scan-build-py/libscanbuild/arguments.py =================================================================== --- clang/tools/scan-build-py/libscanbuild/arguments.py +++ clang/tools/scan-build-py/libscanbuild/arguments.py @@ -244,6 +244,14 @@ action='store_const', help="""Cause the results as a set of .plist files with extra information on related files.""") + format_group.add_argument( + '--sarif', + '-sarif', + dest='output_format', + const='sarif', + default='html', + action='store_const', + help="""Cause the results as a result.sarif file.""") advanced = parser.add_argument_group('advanced options') advanced.add_argument( Index: clang/tools/scan-build-py/libscanbuild/report.py =================================================================== --- clang/tools/scan-build-py/libscanbuild/report.py +++ clang/tools/scan-build-py/libscanbuild/report.py @@ -27,6 +27,7 @@ """ Generates cover report and returns the number of bugs/crashes. """ html_reports_available = args.output_format in {'html', 'plist-html'} + sarif_reports_available = args.output_format in {'sarif'} logging.debug('count crashes and bugs') crash_count = sum(1 for _ in read_crashes(args.output)) @@ -57,6 +58,11 @@ finally: for fragment in fragments: os.remove(fragment) + + if sarif_reports_available: + logging.debug('merging sarif files') + merge_sarif_files(args.output) + return result @@ -277,6 +283,98 @@ if not duplicate(bug): yield bug +def merge_sarif_files(output_dir, sort_files=False): + """ Reads and merges all .sarif files in the given output directory. + + Each sarif file in the output directory is understood as a single run + and thus appear separate in the top level runs array. This requires + modifying the run index of any embedded links in messages. + """ + + def empty(file_name): + return os.stat(file_name).st_size == 0 + + def update_sarif_object(sarif_object, runs_count_offset): + """ + Given a SARIF object, checks its dictionary entries for a 'message' property. + If it exists, updates the message index of embedded links in the run index. + + Recursively looks through entries in the dictionary. + """ + if not isinstance(sarif_object, dict): + return sarif_object + + if 'message' in sarif_object: + sarif_object['message'] = match_and_update_run(sarif_object['message'], runs_count_offset) + + for key in sarif_object: + if isinstance(sarif_object[key], list): + # iterate through subobjects and update it. + arr = [update_sarif_object(entry, runs_count_offset) for entry in sarif_object[key]] + sarif_object[key] = arr + elif isinstance(sarif_object[key], dict): + sarif_object[key] = update_sarif_object(sarif_object[key], runs_count_offset) + else: + # do nothing + pass + + return sarif_object + + + def match_and_update_run(message, runs_count_offset): + """ + Given a SARIF message object, checks if the text property contains an embedded link and + updates the run index if necessary. + """ + if 'text' not in message: + return message + + # we only merge runs, so we only need to update the run index + pattern = re.compile(r'sarif:/runs/(\d+)') + + text = message['text'] + matches = re.finditer(pattern, text) + matches_list = list(matches) + + # update matches from right to left to make increasing character length (9->10) smoother + for idx in range(len(matches_list) - 1, -1, -1): + match = matches_list[idx] + new_run_count = str(runs_count_offset + int(match.group(1))) + text = text[0:match.start(1)] + new_run_count + text[match.end(1):] + + message['text'] = text + return message + + + + sarif_files = (file for file in glob.iglob(os.path.join(output_dir, '*.sarif')) if not empty(file)) + # exposed for testing since the order of files returned by glob is not guaranteed to be sorted + if sort_files: + sarif_files = list(sarif_files) + sarif_files.sort() + + runs_count = 0 + merged = {} + for sarif_file in sarif_files: + with open(sarif_file) as fp: + sarif = json.load(fp) + if 'runs' not in sarif: + continue + + # start with the first file + if not merged: + merged = sarif + else: + # extract the run and append it to the merged output + for run in sarif['runs']: + new_run = update_sarif_object(run, runs_count) + merged['runs'].append(new_run) + + runs_count += len(sarif['runs']) + + with open(os.path.join(output_dir, 'results-merged.sarif'), 'w') as out: + json.dump(merged, out, indent=4, sort_keys=True) + def parse_bug_plist(filename): """ Returns the generator of bugs from a single .plist file. """ Index: clang/tools/scan-build-py/tests/unit/test_analyze.py =================================================================== --- clang/tools/scan-build-py/tests/unit/test_analyze.py +++ clang/tools/scan-build-py/tests/unit/test_analyze.py @@ -128,7 +128,7 @@ class RunAnalyzerTest(unittest.TestCase): @staticmethod - def run_analyzer(content, failures_report): + def run_analyzer(content, failures_report, output_format='plist'): with libear.TemporaryDirectory() as tmpdir: filename = os.path.join(tmpdir, 'test.cpp') with open(filename, 'w') as handle: @@ -141,31 +141,46 @@ 'direct_args': [], 'file': filename, 'output_dir': tmpdir, - 'output_format': 'plist', + 'output_format': output_format, 'output_failures': failures_report } spy = Spy() result = sut.run_analyzer(opts, spy.call) - return (result, spy.arg) + output_files = [] + for entry in os.listdir(tmpdir): + output_files.append(entry) + return (result, spy.arg, output_files) def test_run_analyzer(self): content = "int div(int n, int d) { return n / d; }" - (result, fwds) = RunAnalyzerTest.run_analyzer(content, False) + (result, fwds, _) = RunAnalyzerTest.run_analyzer(content, False) self.assertEqual(None, fwds) self.assertEqual(0, result['exit_code']) def test_run_analyzer_crash(self): content = "int div(int n, int d) { return n / d }" - (result, fwds) = RunAnalyzerTest.run_analyzer(content, False) + (result, fwds, _) = RunAnalyzerTest.run_analyzer(content, False) self.assertEqual(None, fwds) self.assertEqual(1, result['exit_code']) def test_run_analyzer_crash_and_forwarded(self): content = "int div(int n, int d) { return n / d }" - (_, fwds) = RunAnalyzerTest.run_analyzer(content, True) + (_, fwds, _) = RunAnalyzerTest.run_analyzer(content, True) self.assertEqual(1, fwds['exit_code']) self.assertTrue(len(fwds['error_output']) > 0) + def test_run_analyzer_with_sarif(self): + content = "int div(int n, int d) { return n / d; }" + (result, fwds, output_files) = RunAnalyzerTest.run_analyzer(content, False, output_format='sarif') + self.assertEqual(None, fwds) + self.assertEqual(0, result['exit_code']) + + pattern = re.compile(r'^result-.+\.sarif$') + for f in output_files: + if re.match(pattern, f): + return + self.fail('no result sarif files found in output') + class ReportFailureTest(unittest.TestCase): Index: clang/tools/scan-build-py/tests/unit/test_report.py =================================================================== --- clang/tools/scan-build-py/tests/unit/test_report.py +++ clang/tools/scan-build-py/tests/unit/test_report.py @@ -3,6 +3,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +import json import libear import libscanbuild.report as sut import unittest @@ -145,3 +146,516 @@ def test_empty(self): self.assertEqual( sut.commonprefix([]), '') + +class MergeSarifTest(unittest.TestCase): + + def test_merging_sarif(self): + sarif1 = { + '$schema': 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json', + 'runs': [ + { + 'artifacts': [ + { + 'length': 100, + 'location': { + 'uri': '//clang/tools/scan-build-py/tests/unit/test_report.py' + }, + 'mimeType': 'text/plain', + 'roles': [ + 'resultFile' + ] + } + ], + 'columnKind': 'unicodeCodePoints', + 'results': [ + { + 'codeFlows': [ + { + 'threadFlows': [ + { + 'locations': [ + { + 'importance': 'important', + 'location': { + 'message': { + 'text': 'test message 1' + }, + 'physicalLocation': { + 'artifactLocation': { + 'index': 0, + 'uri': '//clang/tools/scan-build-py/tests/unit/test_report.py' + }, + 'region': { + 'endColumn': 5, + 'startColumn': 1, + 'startLine': 2 + } + } + } + } + ] + } + ] + } + ] + }, + { + 'codeFlows': [ + { + 'threadFlows': [ + { + 'locations': [ + { + 'importance': 'important', + 'location': { + 'message': { + 'text': 'test message 2' + }, + 'physicalLocation': { + 'artifactLocation': { + 'index': 0, + 'uri': '//clang/tools/scan-build-py/tests/unit/test_report.py' + }, + 'region': { + 'endColumn': 23, + 'startColumn': 9, + 'startLine': 10 + } + } + } + } + ] + } + ] + } + ] + } + ], + 'tool': { + 'driver': { + 'fullName': 'clang static analyzer', + 'language': 'en-US', + 'name': 'clang', + 'rules': [ + { + 'fullDescription': { + 'text': 'test rule for merge sarif test' + }, + 'helpUrl': '//clang/tools/scan-build-py/tests/unit/test_report.py', + 'id': 'testId', + 'name': 'testName' + } + ], + 'version': 'test clang' + } + } + } + ], + 'version': '2.1.0' + } + sarif2 = { + '$schema': 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json', + 'runs': [ + { + 'artifacts': [ + { + 'length': 1523, + 'location': { + 'uri': '//clang/tools/scan-build-py/tests/unit/test_report.py' + }, + 'mimeType': 'text/plain', + 'roles': [ + 'resultFile' + ] + } + ], + 'columnKind': 'unicodeCodePoints', + 'results': [ + { + 'codeFlows': [ + { + 'threadFlows': [ + { + 'locations': [ + { + 'importance': 'important', + 'location': { + 'message': { + 'text': 'test message 3' + }, + 'physicalLocation': { + 'artifactLocation': { + 'index': 0, + 'uri': '//clang/tools/scan-build-py/tests/unit/test_report.py' + }, + 'region': { + 'endColumn': 99, + 'startColumn': 99, + 'startLine': 17 + } + } + } + } + ] + } + ] + } + ] + }, + { + 'codeFlows': [ + { + 'threadFlows': [ + { + 'locations': [ + { + 'importance': 'important', + 'location': { + 'message': { + 'text': 'test message 4' + }, + 'physicalLocation': { + 'artifactLocation': { + 'index': 0, + 'uri': '//clang/tools/scan-build-py/tests/unit/test_report.py' + }, + 'region': { + 'endColumn': 305, + 'startColumn': 304, + 'startLine': 1 + } + } + } + } + ] + } + ] + } + ] + } + ], + 'tool': { + 'driver': { + 'fullName': 'clang static analyzer', + 'language': 'en-US', + 'name': 'clang', + 'rules': [ + { + 'fullDescription': { + 'text': 'test rule for merge sarif test' + }, + 'helpUrl': '//clang/tools/scan-build-py/tests/unit/test_report.py', + 'id': 'testId', + 'name': 'testName' + } + ], + 'version': 'test clang' + } + } + } + ], + 'version': '2.1.0' + } + + contents = [sarif1, sarif2] + with libear.TemporaryDirectory() as tmpdir: + for idx, content in enumerate(contents): + file_name = os.path.join(tmpdir, 'results-{}.sarif'.format(idx)) + with open(file_name, 'w') as handle: + json.dump(content, handle) + + sut.merge_sarif_files(tmpdir, sort_files=True) + + self.assertIn('results-merged.sarif', os.listdir(tmpdir)) + with open(os.path.join(tmpdir, 'results-merged.sarif')) as f: + merged = json.load(f) + self.assertEqual(len(merged['runs']), 2) + self.assertEqual(len(merged['runs'][0]['results']), 2) + self.assertEqual(len(merged['runs'][1]['results']), 2) + + expected = sarif1 + for run in sarif2['runs']: + expected['runs'].append(run) + + self.assertEqual(merged, expected) + + def test_merge_updates_embedded_link(self): + sarif1 = { + 'runs': [ + { + 'results': [ + { + 'codeFlows': [ + { + 'message': { + 'text': 'test message 1-1 [link](sarif:/runs/1/results/0) [link2](sarif:/runs/1/results/0)' + }, + 'threadFlows': [ + { + 'message': { + 'text': 'test message 1-2 [link](sarif:/runs/1/results/0)' + } + } + ] + } + ] + } + ] + }, + { + 'results': [ + { + 'codeFlows': [ + { + 'message': { + 'text': 'test message 2-1 [link](sarif:/runs/0/results/0)' + }, + 'threadFlows': [ + { + 'message': { + 'text': 'test message 2-2 [link](sarif:/runs/0/results/0)' + } + } + ] + } + ] + } + ] + } + ] + } + sarif2 = { + 'runs': [ + { + 'results': [ + { + 'codeFlows': [ + { + 'message': { + 'text': 'test message 3-1 [link](sarif:/runs/1/results/0) [link2](sarif:/runs/1/results/0)' + }, + 'threadFlows': [ + { + 'message': { + 'text': 'test message 3-2 [link](sarif:/runs/1/results/0)' + } + } + ] + } + ] + } + ], + }, + { + 'results': [ + { + 'codeFlows': [ + { + 'message': { + 'text': 'test message 4-1 [link](sarif:/runs/0/results/0)' + }, + 'threadFlows': [ + { + 'message': { + 'text': 'test message 4-2 [link](sarif:/runs/0/results/0)' + } + } + ] + } + ] + } + ] + } + ] + } + sarif3 = { + 'runs': [ + { + 'results': [ + { + 'codeFlows': [ + { + 'message': { + 'text': 'test message 5-1 [link](sarif:/runs/1/results/0) [link2](sarif:/runs/1/results/0)' + }, + 'threadFlows': [ + { + 'message': { + 'text': 'test message 5-2 [link](sarif:/runs/1/results/0)' + } + } + ] + } + ] + } + ], + }, + { + 'results': [ + { + 'codeFlows': [ + { + 'message': { + 'text': 'test message 6-1 [link](sarif:/runs/0/results/0)' + }, + 'threadFlows': [ + { + 'message': { + 'text': 'test message 6-2 [link](sarif:/runs/0/results/0)' + } + } + ] + } + ] + } + ] + } + ] + } + + contents = [sarif1, sarif2, sarif3] + + with libear.TemporaryDirectory() as tmpdir: + for idx, content in enumerate(contents): + file_name = os.path.join(tmpdir, 'results-{}.sarif'.format(idx)) + with open(file_name, 'w') as handle: + json.dump(content, handle) + + sut.merge_sarif_files(tmpdir, sort_files=True) + + self.assertIn('results-merged.sarif', os.listdir(tmpdir)) + with open(os.path.join(tmpdir, 'results-merged.sarif')) as f: + merged = json.load(f) + self.assertEqual(len(merged['runs']), 6) + + code_flows = [merged['runs'][x]['results'][0]['codeFlows'][0]['message']['text'] for x in range(6)] + thread_flows = [merged['runs'][x]['results'][0]['codeFlows'][0]['threadFlows'][0]['message']['text'] for x in range(6)] + + # The run index should be updated for the second and third sets of runs + self.assertEqual(code_flows, + [ + 'test message 1-1 [link](sarif:/runs/1/results/0) [link2](sarif:/runs/1/results/0)', + 'test message 2-1 [link](sarif:/runs/0/results/0)', + 'test message 3-1 [link](sarif:/runs/3/results/0) [link2](sarif:/runs/3/results/0)', + 'test message 4-1 [link](sarif:/runs/2/results/0)', + 'test message 5-1 [link](sarif:/runs/5/results/0) [link2](sarif:/runs/5/results/0)', + 'test message 6-1 [link](sarif:/runs/4/results/0)' + ]) + self.assertEquals(thread_flows, + [ + 'test message 1-2 [link](sarif:/runs/1/results/0)', + 'test message 2-2 [link](sarif:/runs/0/results/0)', + 'test message 3-2 [link](sarif:/runs/3/results/0)', + 'test message 4-2 [link](sarif:/runs/2/results/0)', + 'test message 5-2 [link](sarif:/runs/5/results/0)', + 'test message 6-2 [link](sarif:/runs/4/results/0)' + ]) + + def test_overflow_run_count(self): + sarif1 = { + 'runs': [ + {'results': [{ + 'message': {'text': 'run 1-0 [link](sarif:/runs/1/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 1-1 [link](sarif:/runs/2/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 1-2 [link](sarif:/runs/3/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 1-3 [link](sarif:/runs/4/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 1-4 [link](sarif:/runs/5/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 1-5 [link](sarif:/runs/6/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 1-6 [link](sarif:/runs/7/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 1-7 [link](sarif:/runs/8/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 1-8 [link](sarif:/runs/9/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 1-9 [link](sarif:/runs/0/results/0)'} + }]} + ] + } + sarif2 = { + 'runs': [ + {'results': [{ + 'message': {'text': 'run 2-0 [link](sarif:/runs/1/results/0) [link2](sarif:/runs/2/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 2-1 [link](sarif:/runs/2/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 2-2 [link](sarif:/runs/3/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 2-3 [link](sarif:/runs/4/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 2-4 [link](sarif:/runs/5/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 2-5 [link](sarif:/runs/6/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 2-6 [link](sarif:/runs/7/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 2-7 [link](sarif:/runs/8/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 2-8 [link](sarif:/runs/9/results/0)'} + }]}, + {'results': [{ + 'message': {'text': 'run 2-9 [link](sarif:/runs/0/results/0)'} + }]} + ] + } + + contents = [sarif1, sarif2] + with libear.TemporaryDirectory() as tmpdir: + for idx, content in enumerate(contents): + file_name = os.path.join(tmpdir, 'results-{}.sarif'.format(idx)) + with open(file_name, 'w') as handle: + json.dump(content, handle) + + sut.merge_sarif_files(tmpdir, sort_files=True) + + self.assertIn('results-merged.sarif', os.listdir(tmpdir)) + with open(os.path.join(tmpdir, 'results-merged.sarif')) as f: + merged = json.load(f) + self.assertEqual(len(merged['runs']), 20) + + messages = [merged['runs'][x]['results'][0]['message']['text'] for x in range(20)] + self.assertEqual(messages, + [ + 'run 1-0 [link](sarif:/runs/1/results/0)', + 'run 1-1 [link](sarif:/runs/2/results/0)', + 'run 1-2 [link](sarif:/runs/3/results/0)', + 'run 1-3 [link](sarif:/runs/4/results/0)', + 'run 1-4 [link](sarif:/runs/5/results/0)', + 'run 1-5 [link](sarif:/runs/6/results/0)', + 'run 1-6 [link](sarif:/runs/7/results/0)', + 'run 1-7 [link](sarif:/runs/8/results/0)', + 'run 1-8 [link](sarif:/runs/9/results/0)', + 'run 1-9 [link](sarif:/runs/0/results/0)', + 'run 2-0 [link](sarif:/runs/11/results/0) [link2](sarif:/runs/12/results/0)', + 'run 2-1 [link](sarif:/runs/12/results/0)', + 'run 2-2 [link](sarif:/runs/13/results/0)', + 'run 2-3 [link](sarif:/runs/14/results/0)', + 'run 2-4 [link](sarif:/runs/15/results/0)', + 'run 2-5 [link](sarif:/runs/16/results/0)', + 'run 2-6 [link](sarif:/runs/17/results/0)', + 'run 2-7 [link](sarif:/runs/18/results/0)', + 'run 2-8 [link](sarif:/runs/19/results/0)', + 'run 2-9 [link](sarif:/runs/10/results/0)' + ])