Index: tools/scan-build-py/libear/__init__.py =================================================================== --- tools/scan-build-py/libear/__init__.py +++ tools/scan-build-py/libear/__init__.py @@ -13,6 +13,7 @@ import shutil import contextlib import logging +import subprocess __all__ = ['build_libear'] @@ -62,7 +63,6 @@ def execute(cmd, *args, **kwargs): """ Make subprocess execution silent. """ - import subprocess kwargs.update({'stdout': subprocess.PIPE, 'stderr': subprocess.STDOUT}) return subprocess.check_call(cmd, *args, **kwargs) Index: tools/scan-build-py/libear/ear.c =================================================================== --- tools/scan-build-py/libear/ear.c +++ tools/scan-build-py/libear/ear.c @@ -28,6 +28,9 @@ #include #include #include +#include +#include +#include #include #if defined HAVE_POSIX_SPAWN || defined HAVE_POSIX_SPAWNP @@ -36,6 +39,7 @@ #if defined HAVE_NSGETENVIRON # include +static char **environ; #else extern char **environ; #endif @@ -65,12 +69,12 @@ typedef char const * bear_env_t[ENV_SIZE]; static int bear_capture_env_t(bear_env_t *env); -static int bear_reset_env_t(bear_env_t *env); static void bear_release_env_t(bear_env_t *env); static char const **bear_update_environment(char *const envp[], bear_env_t *env); static char const **bear_update_environ(char const **in, char const *key, char const *value); -static char **bear_get_environment(); -static void bear_report_call(char const *fun, char const *const argv[]); +static void bear_report_call(char const *const argv[]); +static int bear_write_json_report(int fd, char const *const cmd[], char const *cwd, pid_t pid); +static int bear_encode_json_string(char const *src, char *dst, size_t dst_size); static char const **bear_strings_build(char const *arg, va_list *ap); static char const **bear_strings_copy(char const **const in); static char const **bear_strings_append(char const **in, char const *e); @@ -141,6 +145,9 @@ static void on_load(void) { pthread_mutex_lock(&mutex); +#ifdef HAVE_NSGETENVIRON + environ = *_NSGetEnviron(); +#endif if (!initialized) initialized = bear_capture_env_t(&initial_env); pthread_mutex_unlock(&mutex); @@ -159,7 +166,7 @@ #ifdef HAVE_EXECVE int execve(const char *path, char *const argv[], char *const envp[]) { - bear_report_call(__func__, (char const *const *)argv); + bear_report_call((char const *const *)argv); return call_execve(path, argv, envp); } #endif @@ -169,36 +176,35 @@ #error can not implement execv without execve #endif int execv(const char *path, char *const argv[]) { - bear_report_call(__func__, (char const *const *)argv); - char * const * envp = bear_get_environment(); - return call_execve(path, argv, envp); + bear_report_call((char const *const *)argv); + return call_execve(path, argv, environ); } #endif #ifdef HAVE_EXECVPE int execvpe(const char *file, char *const argv[], char *const envp[]) { - bear_report_call(__func__, (char const *const *)argv); + bear_report_call((char const *const *)argv); return call_execvpe(file, argv, envp); } #endif #ifdef HAVE_EXECVP int execvp(const char *file, char *const argv[]) { - bear_report_call(__func__, (char const *const *)argv); + bear_report_call((char const *const *)argv); return call_execvp(file, argv); } #endif #ifdef HAVE_EXECVP2 int execvP(const char *file, const char *search_path, char *const argv[]) { - bear_report_call(__func__, (char const *const *)argv); + bear_report_call((char const *const *)argv); return call_execvP(file, search_path, argv); } #endif #ifdef HAVE_EXECT int exect(const char *path, char *const argv[], char *const envp[]) { - bear_report_call(__func__, (char const *const *)argv); + bear_report_call((char const *const *)argv); return call_exect(path, argv, envp); } #endif @@ -213,9 +219,8 @@ char const **argv = bear_strings_build(arg, &args); va_end(args); - bear_report_call(__func__, (char const *const *)argv); - char * const * envp = bear_get_environment(); - int const result = call_execve(path, (char *const *)argv, envp); + bear_report_call((char const *const *)argv); + int const result = call_execve(path, (char *const *)argv, environ); bear_strings_release(argv); return result; @@ -232,7 +237,7 @@ char const **argv = bear_strings_build(arg, &args); va_end(args); - bear_report_call(__func__, (char const *const *)argv); + bear_report_call((char const *const *)argv); int const result = call_execvp(file, (char *const *)argv); bear_strings_release(argv); @@ -252,7 +257,7 @@ char const **envp = va_arg(args, char const **); va_end(args); - bear_report_call(__func__, (char const *const *)argv); + bear_report_call((char const *const *)argv); int const result = call_execve(path, (char *const *)argv, (char *const *)envp); @@ -266,7 +271,7 @@ const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *restrict attrp, char *const argv[restrict], char *const envp[restrict]) { - bear_report_call(__func__, (char const *const *)argv); + bear_report_call((char const *const *)argv); return call_posix_spawn(pid, path, file_actions, attrp, argv, envp); } #endif @@ -276,7 +281,7 @@ const posix_spawn_file_actions_t *file_actions, const posix_spawnattr_t *restrict attrp, char *const argv[restrict], char *const envp[restrict]) { - bear_report_call(__func__, (char const *const *)argv); + bear_report_call((char const *const *)argv); return call_posix_spawnp(pid, file, file_actions, attrp, argv, envp); } #endif @@ -318,12 +323,12 @@ DLSYM(func, fp, "execvp"); - bear_env_t current_env; - bear_capture_env_t(¤t_env); - bear_reset_env_t(&initial_env); + char **const original = environ; + char const **const modified = bear_update_environment(original, &initial_env); + environ = (char **)modified; int const result = (*fp)(file, argv); - bear_reset_env_t(¤t_env); - bear_release_env_t(¤t_env); + environ = original; + bear_strings_release(modified); return result; } @@ -336,12 +341,12 @@ DLSYM(func, fp, "execvP"); - bear_env_t current_env; - bear_capture_env_t(¤t_env); - bear_reset_env_t(&initial_env); + char **const original = environ; + char const **const modified = bear_update_environment(original, &initial_env); + environ = (char **)modified; int const result = (*fp)(file, search_path, argv); - bear_reset_env_t(¤t_env); - bear_release_env_t(¤t_env); + environ = original; + bear_strings_release(modified); return result; } @@ -405,11 +410,7 @@ /* this method is to write log about the process creation. */ -static void bear_report_call(char const *fun, char const *const argv[]) { - static int const GS = 0x1d; - static int const RS = 0x1e; - static int const US = 0x1f; - +static void bear_report_call(char const *const argv[]) { if (!initialized) return; @@ -422,32 +423,102 @@ char const * const out_dir = initial_env[0]; size_t const path_max_length = strlen(out_dir) + 32; char filename[path_max_length]; - if (-1 == snprintf(filename, path_max_length, "%s/%d.cmd", out_dir, getpid())) { + if (-1 == snprintf(filename, path_max_length, "%s/execution.XXXXXX", out_dir)) { perror("bear: snprintf"); exit(EXIT_FAILURE); } - FILE * fd = fopen(filename, "a+"); - if (0 == fd) { - perror("bear: fopen"); + int fd = mkstemp((char *)&filename); + if (-1 == fd) { + perror("bear: mkstemp"); exit(EXIT_FAILURE); } - fprintf(fd, "%d%c", getpid(), RS); - fprintf(fd, "%d%c", getppid(), RS); - fprintf(fd, "%s%c", fun, RS); - fprintf(fd, "%s%c", cwd, RS); - size_t const argc = bear_strings_length(argv); - for (size_t it = 0; it < argc; ++it) { - fprintf(fd, "%s%c", argv[it], US); + if (0 > bear_write_json_report(fd, argv, cwd, getpid())) { + perror("bear: writing json problem"); + exit(EXIT_FAILURE); } - fprintf(fd, "%c", GS); - if (fclose(fd)) { - perror("bear: fclose"); + if (close(fd)) { + perror("bear: close"); exit(EXIT_FAILURE); } free((void *)cwd); pthread_mutex_unlock(&mutex); } +static int bear_write_json_report(int fd, char const *const cmd[], char const *const cwd, pid_t pid) { + if (0 > dprintf(fd, "{ \"pid\": %d, \"cmd\": [", pid)) + return -1; + + for (char const *const *it = cmd; (it) && (*it); ++it) { + char const *const sep = (it != cmd) ? "," : ""; + const size_t buffer_size = 2 * strlen(*it); + char buffer[buffer_size]; + if (-1 == bear_encode_json_string(*it, buffer, buffer_size)) + return -1; + if (0 > dprintf(fd, "%s \"%s\"", sep, buffer)) + return -1; + } + const size_t buffer_size = 2 * strlen(cwd); + char buffer[buffer_size]; + if (-1 == bear_encode_json_string(cwd, buffer, buffer_size)) + return -1; + if (0 > dprintf(fd, "], \"cwd\": \"%s\" }", buffer)) + return -1; + + return 0; +} + +static int bear_encode_json_string(char const *const src, char *const dst, size_t const dst_size) { + char const *src_it = src; + char const *const src_end = src + strlen(src); + + char *dst_it = dst; + char *const dst_end = dst + dst_size; + + for (; src_it != src_end; ++src_it, ++dst_it) { + if (dst_it == dst_end) + return -1; + // Insert an escape character before control characters. + switch (*src_it) { + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + case '"': + case '\\': + *dst_it++ = '\\'; + break; + default: + break; + } + // Transform some of the control characters. + switch (*src_it) { + case '\b': + *dst_it = 'b'; + break; + case '\f': + *dst_it = 'f'; + break; + case '\n': + *dst_it = 'n'; + break; + case '\r': + *dst_it = 'r'; + break; + case '\t': + *dst_it = 't'; + break; + default: + *dst_it = *src_it; + } + } + if (dst_it == dst_end) + return -1; + // Insert a terminating 0 value. + *dst_it = 0; + return 0; +} + /* update environment assure that chilren processes will copy the desired * behaviour */ @@ -462,18 +533,6 @@ return status; } -static int bear_reset_env_t(bear_env_t *env) { - int status = 1; - for (size_t it = 0; it < ENV_SIZE; ++it) { - if ((*env)[it]) { - setenv(env_names[it], (*env)[it], 1); - } else { - unsetenv(env_names[it]); - } - } - return status; -} - static void bear_release_env_t(bear_env_t *env) { for (size_t it = 0; it < ENV_SIZE; ++it) { free((void *)(*env)[it]); @@ -518,14 +577,6 @@ return bear_strings_append(envs, env); } -static char **bear_get_environment() { -#if defined HAVE_NSGETENVIRON - return *_NSGetEnviron(); -#else - return environ; -#endif -} - /* util methods to deal with string arrays. environment and process arguments * are both represented as string arrays. */ Index: tools/scan-build-py/libscanbuild/intercept.py =================================================================== --- tools/scan-build-py/libscanbuild/intercept.py +++ tools/scan-build-py/libscanbuild/intercept.py @@ -26,11 +26,11 @@ import re import itertools import json -import glob import logging +import uuid from libear import build_libear, TemporaryDirectory from libscanbuild import command_entry_point, compiler_wrapper, \ - wrapper_environment, run_command, run_build + wrapper_environment, run_command, run_build, Execution from libscanbuild import duplicate_check from libscanbuild.compilation import split_command from libscanbuild.arguments import parse_args_for_intercept_build @@ -38,13 +38,9 @@ __all__ = ['capture', 'intercept_build', 'intercept_compiler_wrapper'] -GS = chr(0x1d) -RS = chr(0x1e) -US = chr(0x1f) - COMPILER_WRAPPER_CC = 'intercept-cc' COMPILER_WRAPPER_CXX = 'intercept-c++' -TRACE_FILE_EXTENSION = '.cmd' # same as in ear.c +TRACE_FILE_PREFIX = 'execution.' # same as in ear.c WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'}) @@ -89,11 +85,9 @@ environment = setup_environment(args, tmp_dir) exit_code = run_build(args.build, env=environment) # read the intercepted exec calls - exec_traces = itertools.chain.from_iterable( - parse_exec_trace(os.path.join(tmp_dir, filename)) - for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd')))) + calls = (parse_exec_trace(file) for file in exec_trace_files(tmp_dir)) # do post processing - entries = post_processing(exec_traces) + entries = post_processing(calls) # dump the compilation database with open(args.cdb, 'w+') as handle: json.dump(list(entries), handle, sort_keys=True, indent=4) @@ -157,7 +151,7 @@ return # write current execution info to the pid file try: - target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION + target_file_name = TRACE_FILE_PREFIX + str(uuid.uuid4()) target_file = os.path.join(target_dir, target_file_name) logging.debug('writing execution report to: %s', target_file) write_exec_trace(target_file, execution) @@ -174,35 +168,42 @@ :param filename: path to the output execution trace file, :param entry: the Execution object to append to that file. """ - with open(filename, 'ab') as handler: - pid = str(entry.pid) - command = US.join(entry.cmd) + US - content = RS.join([pid, pid, 'wrapper', entry.cwd, command]) + GS - handler.write(content.encode('utf-8')) + call = {'pid': entry.pid, 'cwd': entry.cwd, 'cmd': entry.cmd} + with open(filename, 'w') as handler: + json.dump(call, handler) def parse_exec_trace(filename): - """ Parse the file generated by the 'libear' preloaded library. + """ Parse execution report file. Given filename points to a file which contains the basic report - generated by the interception library or wrapper command. A single - report file _might_ contain multiple process creation info. """ + generated by the interception library or compiler wrapper. + + :param filename: path to an execution trace file to read from, + :return: an Execution object. """ logging.debug('parse exec trace file: %s', filename) with open(filename, 'r') as handler: - content = handler.read() - for group in filter(bool, content.split(GS)): - records = group.split(RS) - yield { - 'pid': records[0], - 'ppid': records[1], - 'function': records[2], - 'directory': records[3], - 'command': records[4].split(US)[:-1] - } + entry = json.load(handler) + return Execution( + pid=entry['pid'], + cwd=entry['cwd'], + cmd=entry['cmd']) + + +def exec_trace_files(directory): + """ Generates exec trace file names. + + :param directory: path to directory which contains the trace files. + :return: a generator of file names (absolute path). """ + + for root, _, files in os.walk(directory): + for candidate in files: + if candidate.startswith(TRACE_FILE_PREFIX): + yield os.path.join(root, candidate) -def format_entry(exec_trace): +def format_entry(execution): """ Generate the desired fields for compilation database entries. """ def abspath(cwd, name): @@ -210,17 +211,17 @@ fullname = name if os.path.isabs(name) else os.path.join(cwd, name) return os.path.normpath(fullname) - logging.debug('format this command: %s', exec_trace['command']) - compilation = split_command(exec_trace['command']) + logging.debug('format this command: %s', execution.cmd) + compilation = split_command(execution.cmd) if compilation: for source in compilation.files: compiler = 'c++' if compilation.compiler == 'c++' else 'cc' command = [compiler, '-c'] + compilation.flags + [source] logging.debug('formated as: %s', command) yield { - 'directory': exec_trace['directory'], + 'directory': execution.cwd, 'command': encode(command), - 'file': abspath(exec_trace['directory'], source) + 'file': abspath(execution.cwd, source) } Index: tools/scan-build-py/tests/unit/test_intercept.py =================================================================== --- tools/scan-build-py/tests/unit/test_intercept.py +++ tools/scan-build-py/tests/unit/test_intercept.py @@ -6,6 +6,7 @@ import libear import libscanbuild.intercept as sut +from libscanbuild import Execution import unittest import os.path @@ -14,7 +15,7 @@ def test_format_entry_filters_action(self): def test(command): - trace = {'command': command, 'directory': '/opt/src/project'} + trace = Execution(cmd=command, cwd='/opt/src/project', pid=0) return list(sut.format_entry(trace)) self.assertTrue(test(['cc', '-c', 'file.c', '-o', 'file.o'])) @@ -27,7 +28,7 @@ current = os.path.join(parent, 'project') def test(filename): - trace = {'directory': current, 'command': ['cc', '-c', filename]} + trace = Execution(cmd=['cc', '-c', filename], cwd=current, pid=0) return list(sut.format_entry(trace))[0]['file'] self.assertEqual(os.path.join(current, 'file.c'), test('file.c'))