Index: tools/scan-build-py/libscanbuild/compilation.py =================================================================== --- tools/scan-build-py/libscanbuild/compilation.py +++ tools/scan-build-py/libscanbuild/compilation.py @@ -3,16 +3,20 @@ # # This file is distributed under the University of Illinois Open Source # License. See LICENSE.TXT for details. -""" This module is responsible for to parse a compiler invocation. """ +""" This module is responsible to parse a compiler invocation. """ import re import os import collections +import logging +import json +from libscanbuild import Execution +from libscanbuild.shell import decode __all__ = ['split_command', 'classify_source', 'compiler_language'] # Ignored compiler options map for compilation database creation. -# The map is used in `split_command` method. (Which does ignore and classify +# The map is used in `_split_command` method. (Which does ignore and classify # parameters.) Please note, that these are not the only parameters which # might be ignored. # @@ -48,61 +52,205 @@ '-Xlinker': 1 } -# Known C/C++ compiler executable name patterns -COMPILER_PATTERNS = frozenset([ - re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), - re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), - re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), - re.compile(r'^llvm-g(cc|\+\+)$'), +# Known C/C++ compiler wrapper name patterns +COMPILER_PATTERN_WRAPPER = re.compile(r'^(distcc|ccache)$') + +# Known C compiler executable name patterns +COMPILER_PATTERNS_CC = frozenset([ + re.compile(r'^(|i|mpi)cc$'), + re.compile(r'^([^-]*-)*[mg]cc(-\d+(\.\d+){0,2})?$'), + re.compile(r'^([^-]*-)*clang(-\d+(\.\d+){0,2})?$'), + re.compile(r'^(g|)xlc$'), ]) +# Known C++ compiler executable name patterns +COMPILER_PATTERNS_CXX = frozenset([ + re.compile(r'^(c\+\+|cxx|CC)$'), + re.compile(r'^([^-]*-)*[mg]\+\+(-\d+(\.\d+){0,2})?$'), + re.compile(r'^([^-]*-)*clang\+\+(-\d+(\.\d+){0,2})?$'), + re.compile(r'^(icpc|mpiCC|mpicxx|mpic\+\+)$'), + re.compile(r'^(g|)xl(C|c\+\+)$'), +]) -def split_command(command): - """ Returns a value when the command is a compilation, None otherwise. +CompilationCommand = collections.namedtuple( + 'CompilationCommand', ['compiler', 'flags', 'files']) - The value on success is a named tuple with the following attributes: - files: list of source files - flags: list of compile options - compiler: string value of 'c' or 'c++' """ +class Compilation: + def __init__(self, compiler, flags, source, directory): + """ Constructor for a single compilation. - # the result of this method - result = collections.namedtuple('Compilation', - ['compiler', 'flags', 'files']) - result.compiler = compiler_language(command) - result.flags = [] - result.files = [] - # quit right now, if the program was not a C/C++ compiler - if not result.compiler: + This method just normalize the paths and initialize values. """ + + self.compiler = compiler + self.flags = flags + self.directory = os.path.normpath(directory) + self.source = source if os.path.isabs(source) else \ + os.path.normpath(os.path.join(self.directory, source)) + + def __hash__(self): + return hash((self.compiler, self.source, self.directory, + ':'.join(self.flags))) + + def __eq__(self, other): + return vars(self) == vars(other) + + def as_dict(self): + """ This method dumps the object attributes into a dictionary. """ + + return vars(self) + + def as_db_entry(self): + """ This method creates a compilation database entry. """ + + relative = os.path.relpath(self.source, self.directory) + compiler = 'cc' if self.compiler == 'c' else 'c++' + return { + 'file': relative, + 'arguments': [compiler, '-c'] + self.flags + [relative], + 'directory': self.directory + } + + @staticmethod + def from_db_entry(entry): + """ Parser method for compilation entry. + + From compilation database entry it creates the compilation object. + + :param entry: the compilation database entry + :return: a single compilation object """ + + command = decode(entry['command']) if 'command' in entry else \ + entry['arguments'] + execution = Execution(cmd=command, cwd=entry['directory'], pid=0) + entries = list(Compilation.iter_from_execution(execution)) + assert len(entries) == 1 + return entries[0] + + @staticmethod + def iter_from_execution(execution, cc='cc', cxx='c++'): + """ Generator method for compilation entries. + + From a single compiler call it can generate zero or more entries. + + :param execution: executed command and working directory + :param cc: user specified C compiler name + :param cxx: user specified C++ compiler name + :return: stream of CompilationDbEntry objects """ + + candidate = Compilation._split_command(execution.cmd, cc, cxx) + for source in (candidate.files if candidate else []): + result = Compilation(directory=execution.cwd, + source=source, + compiler=candidate.compiler, + flags=candidate.flags) + if os.path.isfile(result.source): + yield result + + @staticmethod + def _split_compiler(command, cc, cxx): + """ A predicate to decide the command is a compiler call or not. + + :param command: the command to classify + :param cc: user specified C compiler name + :param cxx: user specified C++ compiler name + :return: None if the command is not a compilation, or a tuple + (compiler_language, rest of the command) otherwise """ + + def is_wrapper(cmd): + return True if COMPILER_PATTERN_WRAPPER.match(cmd) else False + + def is_c_compiler(cmd): + return os.path.basename(cc) == cmd or \ + any(pattern.match(cmd) for pattern in COMPILER_PATTERNS_CC) + + def is_cxx_compiler(cmd): + return os.path.basename(cxx) == cmd or \ + any(pattern.match(cmd) for pattern in COMPILER_PATTERNS_CXX) + + if command: # not empty list will allow to index '0' and '1:' + executable = os.path.basename(command[0]) + parameters = command[1:] + # 'wrapper' 'parameters' and + # 'wrapper' 'compiler' 'parameters' are valid. + # plus, a wrapper can wrap wrapper too. + if is_wrapper(executable): + result = Compilation._split_compiler(parameters, cc, cxx) + return ('c', parameters) if result is None else result + # and 'compiler' 'parameters' is valid. + elif is_c_compiler(executable): + return 'c', parameters + elif is_cxx_compiler(executable): + return 'c++', parameters return None - # iterate on the compile options - args = iter(command[1:]) - for arg in args: - # quit when compilation pass is not involved - if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: + + @staticmethod + def _split_command(command, cc, cxx): + """ Returns a value when the command is a compilation, None otherwise. + + :param command: the command to classify + :param cc: user specified C compiler name + :param cxx: user specified C++ compiler name + :return: stream of CompilationCommand objects """ + + logging.debug('input was: %s', command) + # quit right now, if the program was not a C/C++ compiler + compiler_and_arguments = Compilation._split_compiler(command, cc, cxx) + if compiler_and_arguments is None: return None - # ignore some flags - elif arg in IGNORED_FLAGS: - count = IGNORED_FLAGS[arg] - for _ in range(count): - next(args) - elif re.match(r'^-(l|L|Wl,).+', arg): - pass - # some parameters could look like filename, take as compile option - elif arg in {'-D', '-I'}: - result.flags.extend([arg, next(args)]) - # parameter which looks source file is taken... - elif re.match(r'^[^-].+', arg) and classify_source(arg): - result.files.append(arg) - # and consider everything else as compile option. - else: - result.flags.append(arg) - # do extra check on number of source files - return result if result.files else None + + # the result of this method + result = CompilationCommand(compiler=compiler_and_arguments[0], + flags=[], + files=[]) + # iterate on the compile options + args = iter(compiler_and_arguments[1]) + for arg in args: + # quit when compilation pass is not involved + if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: + return None + # ignore some flags + elif arg in IGNORED_FLAGS: + count = IGNORED_FLAGS[arg] + for _ in range(count): + next(args) + elif re.match(r'^-(l|L|Wl,).+', arg): + pass + # some parameters could look like filename, take as compile option + elif arg in {'-D', '-I'}: + result.flags.extend([arg, next(args)]) + # parameter which looks source file is taken... + elif re.match(r'^[^-].+', arg) and classify_source(arg): + result.files.append(arg) + # and consider everything else as compile option. + else: + result.flags.append(arg) + logging.debug('output is: %s', result) + # do extra check on number of source files + return result if result.files else None + + +class CompilationDatabase: + @staticmethod + def save(filename, iterator): + entries = [entry.as_db_entry() for entry in iterator] + with open(filename, 'w+') as handle: + json.dump(entries, handle, sort_keys=True, indent=4) + + @staticmethod + def load(filename): + with open(filename, 'r') as handle: + for entry in json.load(handle): + yield Compilation.from_db_entry(entry) def classify_source(filename, c_compiler=True): - """ Return the language from file name extension. """ + """ Classify source file names and returns the presumed language, + based on the file name extension. + + :param filename: the source file name + :param c_compiler: indicate that the compiler is a C compiler, + :return: the language from file name extension. """ mapping = { '.c': 'c' if c_compiler else 'c++', @@ -127,15 +275,28 @@ return mapping.get(extension) +# Bellow this line, only temporary declarations for backward compatibility. # + +def split_command(command): + """ Returns a value when the command is a compilation, None otherwise. + + The value on success is a named tuple with the following attributes: + + files: list of source files + flags: list of compile options + compiler: string value of 'c' or 'c++' """ + + # the result of this method + return Compilation._split_command(command, "cc", "c++") + + def compiler_language(command): """ A predicate to decide the command is a compiler call or not. Returns 'c' or 'c++' when it match. None otherwise. """ - cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') + language_and_arguments = Compilation._split_compiler(command, "cc", "c++") + if language_and_arguments is None: + return None - if command: - executable = os.path.basename(command[0]) - if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): - return 'c++' if cplusplus.match(executable) else 'c' - return None + return language_and_arguments[0] Index: tools/scan-build-py/tests/unit/test_compilation.py =================================================================== --- tools/scan-build-py/tests/unit/test_compilation.py +++ tools/scan-build-py/tests/unit/test_compilation.py @@ -10,88 +10,156 @@ class CompilerTest(unittest.TestCase): - def test_is_compiler_call(self): - self.assertIsNotNone(sut.compiler_language(['clang'])) - self.assertIsNotNone(sut.compiler_language(['clang-3.6'])) - self.assertIsNotNone(sut.compiler_language(['clang++'])) - self.assertIsNotNone(sut.compiler_language(['clang++-3.5.1'])) - self.assertIsNotNone(sut.compiler_language(['cc'])) - self.assertIsNotNone(sut.compiler_language(['c++'])) - self.assertIsNotNone(sut.compiler_language(['gcc'])) - self.assertIsNotNone(sut.compiler_language(['g++'])) - self.assertIsNotNone(sut.compiler_language(['/usr/local/bin/gcc'])) - self.assertIsNotNone(sut.compiler_language(['/usr/local/bin/g++'])) - self.assertIsNotNone(sut.compiler_language(['/usr/local/bin/clang'])) - self.assertIsNotNone( - sut.compiler_language(['armv7_neno-linux-gnueabi-g++'])) - - self.assertIsNone(sut.compiler_language([])) - self.assertIsNone(sut.compiler_language([''])) - self.assertIsNone(sut.compiler_language(['ld'])) - self.assertIsNone(sut.compiler_language(['as'])) - self.assertIsNone(sut.compiler_language(['/usr/local/bin/compiler'])) + def assert_c_compiler(self, command, cc='nope', cxx='nope++'): + value = sut.Compilation._split_compiler(command, cc, cxx) + self.assertIsNotNone(value) + self.assertEqual(value[0], 'c') + + def assert_cxx_compiler(self, command, cc='nope', cxx='nope++'): + value = sut.Compilation._split_compiler(command, cc, cxx) + self.assertIsNotNone(value) + self.assertEqual(value[0], 'c++') + + def assert_not_compiler(self, command): + value = sut.Compilation._split_compiler(command, 'nope', 'nope') + self.assertIsNone(value) + + def test_compiler_call(self): + self.assert_c_compiler(['cc']) + self.assert_cxx_compiler(['CC']) + self.assert_cxx_compiler(['c++']) + self.assert_cxx_compiler(['cxx']) + + def test_clang_compiler_call(self): + self.assert_c_compiler(['clang']) + self.assert_c_compiler(['clang-3.6']) + self.assert_cxx_compiler(['clang++']) + self.assert_cxx_compiler(['clang++-3.5.1']) + + def test_gcc_compiler_call(self): + self.assert_c_compiler(['gcc']) + self.assert_cxx_compiler(['g++']) + + def test_intel_compiler_call(self): + self.assert_c_compiler(['icc']) + self.assert_cxx_compiler(['icpc']) + + def test_aix_compiler_call(self): + self.assert_c_compiler(['xlc']) + self.assert_cxx_compiler(['xlc++']) + self.assert_cxx_compiler(['xlC']) + self.assert_c_compiler(['gxlc']) + self.assert_cxx_compiler(['gxlc++']) + + def test_open_mpi_compiler_call(self): + self.assert_c_compiler(['mpicc']) + self.assert_cxx_compiler(['mpiCC']) + self.assert_cxx_compiler(['mpicxx']) + self.assert_cxx_compiler(['mpic++']) + + def test_compiler_call_with_path(self): + self.assert_c_compiler(['/usr/local/bin/gcc']) + self.assert_cxx_compiler(['/usr/local/bin/g++']) + self.assert_c_compiler(['/usr/local/bin/clang']) + + def test_cross_compiler_call(self): + self.assert_cxx_compiler(['armv7_neno-linux-gnueabi-g++']) + + def test_compiler_wrapper_call(self): + self.assert_c_compiler(['distcc']) + self.assert_c_compiler(['distcc', 'cc']) + self.assert_cxx_compiler(['distcc', 'c++']) + self.assert_c_compiler(['ccache']) + self.assert_c_compiler(['ccache', 'cc']) + self.assert_cxx_compiler(['ccache', 'c++']) + + def test_non_compiler_call(self): + self.assert_not_compiler([]) + self.assert_not_compiler(['']) + self.assert_not_compiler(['ld']) + self.assert_not_compiler(['as']) + self.assert_not_compiler(['/usr/local/bin/compiler']) + + def test_specific_compiler_call(self): + self.assert_c_compiler(['nope'], cc='nope') + self.assert_c_compiler(['./nope'], cc='nope') + self.assert_c_compiler(['/path/nope'], cc='nope') + self.assert_cxx_compiler(['nope++'], cxx='nope++') + self.assert_cxx_compiler(['./nope++'], cxx='nope++') + self.assert_cxx_compiler(['/path/nope++'], cxx='nope++') + + def assert_arguments_equal(self, expected, command): + value = sut.Compilation._split_compiler(command, 'nope', 'nope') + self.assertIsNotNone(value) + self.assertEqual(expected, value[1]) + + def test_argument_split(self): + arguments = ['-c', 'file.c'] + self.assert_arguments_equal(arguments, ['distcc'] + arguments) + self.assert_arguments_equal(arguments, ['distcc', 'cc'] + arguments) + self.assert_arguments_equal(arguments, ['distcc', 'c++'] + arguments) + self.assert_arguments_equal(arguments, ['ccache'] + arguments) + self.assert_arguments_equal(arguments, ['ccache', 'cc'] + arguments) + self.assert_arguments_equal(arguments, ['ccache', 'c++'] + arguments) class SplitTest(unittest.TestCase): - def test_detect_cxx_from_compiler_name(self): - def test(cmd): - result = sut.split_command([cmd, '-c', 'src.c']) - self.assertIsNotNone(result, "wrong input for test") - return result.compiler == 'c++' + def assert_compilation(self, command): + result = sut.Compilation._split_command(command, 'nope', 'nope') + self.assertIsNotNone(result) - self.assertFalse(test('cc')) - self.assertFalse(test('gcc')) - self.assertFalse(test('clang')) - - self.assertTrue(test('c++')) - self.assertTrue(test('g++')) - self.assertTrue(test('g++-5.3.1')) - self.assertTrue(test('clang++')) - self.assertTrue(test('clang++-3.7.1')) - self.assertTrue(test('armv7_neno-linux-gnueabi-g++')) + def assert_non_compilation(self, command): + result = sut.Compilation._split_command(command, 'nope', 'nope') + self.assertIsNone(result) def test_action(self): - self.assertIsNotNone(sut.split_command(['clang', 'source.c'])) - self.assertIsNotNone(sut.split_command(['clang', '-c', 'source.c'])) - self.assertIsNotNone(sut.split_command(['clang', '-c', 'source.c', - '-MF', 'a.d'])) + self.assert_compilation(['clang', 'source.c']) + self.assert_compilation(['clang', '-c', 'source.c']) + self.assert_compilation(['clang', '-c', 'source.c', '-MF', 'a.d']) + + self.assert_non_compilation(['clang', '-E', 'source.c']) + self.assert_non_compilation(['clang', '-c', '-E', 'source.c']) + self.assert_non_compilation(['clang', '-c', '-M', 'source.c']) + self.assert_non_compilation(['clang', '-c', '-MM', 'source.c']) - self.assertIsNone(sut.split_command(['clang', '-E', 'source.c'])) - self.assertIsNone(sut.split_command(['clang', '-c', '-E', 'source.c'])) - self.assertIsNone(sut.split_command(['clang', '-c', '-M', 'source.c'])) - self.assertIsNone( - sut.split_command(['clang', '-c', '-MM', 'source.c'])) + def assert_source_files(self, expected, command): + result = sut.Compilation._split_command(command, 'nope', 'nope') + self.assertIsNotNone(result) + self.assertEqual(expected, result.files) def test_source_file(self): - def test(expected, cmd): - self.assertEqual(expected, sut.split_command(cmd).files) - - test(['src.c'], ['clang', 'src.c']) - test(['src.c'], ['clang', '-c', 'src.c']) - test(['src.C'], ['clang', '-x', 'c', 'src.C']) - test(['src.cpp'], ['clang++', '-c', 'src.cpp']) - test(['s1.c', 's2.c'], ['clang', '-c', 's1.c', 's2.c']) - test(['s1.c', 's2.c'], ['cc', 's1.c', 's2.c', '-ldep', '-o', 'a.out']) - test(['src.c'], ['clang', '-c', '-I', './include', 'src.c']) - test(['src.c'], ['clang', '-c', '-I', '/opt/me/include', 'src.c']) - test(['src.c'], ['clang', '-c', '-D', 'config=file.c', 'src.c']) - - self.assertIsNone( - sut.split_command(['cc', 'this.o', 'that.o', '-o', 'a.out'])) - self.assertIsNone( - sut.split_command(['cc', 'this.o', '-lthat', '-o', 'a.out'])) + self.assert_source_files(['src.c'], ['clang', 'src.c']) + self.assert_source_files(['src.c'], ['clang', '-c', 'src.c']) + self.assert_source_files(['src.C'], ['clang', '-x', 'c', 'src.C']) + self.assert_source_files(['src.cpp'], ['clang++', '-c', 'src.cpp']) + self.assert_source_files(['s1.c', 's2.c'], + ['clang', '-c', 's1.c', 's2.c']) + self.assert_source_files(['s1.c', 's2.c'], + ['cc', 's1.c', 's2.c', '-ldp', '-o', 'a.out']) + self.assert_source_files(['src.c'], + ['clang', '-c', '-I', './include', 'src.c']) + self.assert_source_files(['src.c'], + ['clang', '-c', '-I', '/opt/inc', 'src.c']) + self.assert_source_files(['src.c'], + ['clang', '-c', '-Dconfig=file.c', 'src.c']) + + self.assert_non_compilation(['cc', 'this.o', 'that.o', '-o', 'a.out']) + self.assert_non_compilation(['cc', 'this.o', '-lthat', '-o', 'a.out']) + + def assert_flags(self, expected, flags): + command = ['clang', '-c', 'src.c'] + flags + result = sut.Compilation._split_command(command, 'nope', 'nope') + self.assertIsNotNone(result) + self.assertEqual(expected, result.flags) def test_filter_flags(self): - def test(expected, flags): - command = ['clang', '-c', 'src.c'] + flags - self.assertEqual(expected, sut.split_command(command).flags) def same(expected): - test(expected, expected) + self.assert_flags(expected, expected) def filtered(flags): - test([], flags) + self.assert_flags([], flags) same([]) same(['-I', '/opt/me/include', '-DNDEBUG', '-ULIMITS']) @@ -108,15 +176,36 @@ class SourceClassifierTest(unittest.TestCase): + def assert_non_source(self, filename): + result = sut.classify_source(filename) + self.assertIsNone(result) + + def assert_c_source(self, filename, force): + result = sut.classify_source(filename, force) + self.assertEqual('c', result) + + def assert_cxx_source(self, filename, force): + result = sut.classify_source(filename, force) + self.assertEqual('c++', result) + def test_sources(self): - self.assertIsNone(sut.classify_source('file.o')) - self.assertIsNone(sut.classify_source('file.exe')) - self.assertIsNone(sut.classify_source('/path/file.o')) - self.assertIsNone(sut.classify_source('clang')) - - self.assertEqual('c', sut.classify_source('file.c')) - self.assertEqual('c', sut.classify_source('./file.c')) - self.assertEqual('c', sut.classify_source('/path/file.c')) - self.assertEqual('c++', sut.classify_source('file.c', False)) - self.assertEqual('c++', sut.classify_source('./file.c', False)) - self.assertEqual('c++', sut.classify_source('/path/file.c', False)) + self.assert_non_source('file.o') + self.assert_non_source('file.exe') + self.assert_non_source('/path/file.o') + self.assert_non_source('clang') + + self.assert_c_source('file.c', True) + self.assert_cxx_source('file.c', False) + + self.assert_cxx_source('file.cxx', True) + self.assert_cxx_source('file.cxx', False) + self.assert_cxx_source('file.c++', True) + self.assert_cxx_source('file.c++', False) + self.assert_cxx_source('file.cpp', True) + self.assert_cxx_source('file.cpp', False) + + self.assert_c_source('/path/file.c', True) + self.assert_c_source('./path/file.c', True) + self.assert_c_source('../path/file.c', True) + self.assert_c_source('/file.c', True) + self.assert_c_source('./file.c', True)