Index: llvm/utils/remove_redundant.py =================================================================== --- /dev/null +++ llvm/utils/remove_redundant.py @@ -0,0 +1,264 @@ +import sys +import json +import re +import subprocess +import os +import time +import datetime + +repo_folders = ['/Users/mvzolotu/devel/tempRepo/src'] +build_folder = '/Users/mvzolotu/devel/tempRepo/build/' +compile_db_file = build_folder + 'compile_commands.json' +processed_files ='/tmp/processed_clang.json' +files_list_name = '' #/Users/mvzolotu/devel/RedundantIncludes/llvm/tools/clang/files_list.json' +do_tests = False +tests_command = 'ninja && ninja check' + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + + +def load_data(fname): + data = {} + with open(fname) as f: + data = json.loads(f.read()) + return data + +def save_data(data, fname): + with open(fname, 'w') as f: + f.write(json.dumps(data, sort_keys=True, indent=2)) + +def get_repo_from_name(fname): + for repo_folder in repo_folders: + if fname.startswith(repo_folder): + return repo_folder + return '' + +def get_name(fname): + fname = fname.replace(get_repo_from_name(fname), '') + return fname + +def commit(f): + fname = f['file'] + os.chdir(get_repo_from_name(fname)) + subprocess.call('git --no-pager diff -U0', shell=True) + subprocess.call('git add %s >& /dev/null' % fname, shell=True) + subprocess.call('git commit -m "Speedup %s compilation." >& /dev/null' % get_name(fname), shell=True) + return + +def get_includes(fname): + includes = [] + with open(fname) as f: + for l in f.read().split('\n'): + m = re.match('#include\s+(\S+)', l) + if m: + header = m.group(1) + if header.endswith('.def"') or header.endswith('.inc"'): + continue + includes.append(header) + return includes + +def can_remove(redundant, f): + fname = f['file'] + lines = [] + with open(fname) as fr: + for l in fr.read().split('\n'): + m = re.match('#include\s+(\S+)', l) + if m: + header = m.group(1) + if header in redundant: + continue + lines.append(l) + with open(fname,'w') as fw: + fw.write('\n'.join(lines)) + cmd = f['command'] + folder = f['directory'] + os.chdir(folder) + r = subprocess.call(cmd + ' >& /dev/null', shell=True) + return r == 0 + +def eagerly_remove_headers(f): + fname = f['file'] + candidates = get_includes(fname) + necessary = [] + redundant = [] + assert can_remove(redundant, f) + while len(candidates) > 0: + candidate = candidates.pop() + print 'Trying to remove %s... ' % candidate, + assert can_remove(redundant, f) + if can_remove(redundant + [candidate], f): + redundant.append(candidate) + print 'Redundant!' + else: + necessary.append(candidate) + print 'Required!' + restore_original(f) + + assert can_remove(redundant, f) + return redundant + +def generate_preproc_cmd(cmd, suffix = '|grep -v -x "^#.*" |grep -v -x "^$" |wc -l'): + parts = cmd.split() + i = 0 + while i < len(parts): + if parts[i] == '-o': + break + i+=1 + parts[i+1] = '-' + return ' '.join(parts) + ' -E ' + suffix + +def run_and_return_exitcode(folder, cmd): + os.chdir(folder) + p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + outs, errs = p.communicate() + return p.returncode + +def run_and_parse_numeric_output(folder, cmd): + os.chdir(folder) + p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + outs, errs = p.communicate() + if p.returncode != 0: + raise Exception("Compile error!") + return int(outs) + +def get_preprocessed_size(f, redundant_headers): + restore_original(f) + can_remove(redundant_headers, f) + preproc_cmd = generate_preproc_cmd(f['command']) + fname = f['file'] + repo = get_repo_from_name(fname) + return run_and_parse_numeric_output(f['directory'], preproc_cmd) + +def include_removed(f, redundant_header): + preproc_cmd = generate_preproc_cmd(f['command'], suffix = '|grep -c "' + redundant_header + '" > /dev/null') + fname = f['file'] + repo = get_repo_from_name(fname) + return run_and_return_exitcode(f['directory'], preproc_cmd) == 0 + +def preprocessed_size_changed(f, redundant_headers): + removed_lines = len(redundant_headers) + preproc_cmd = generate_preproc_cmd(f['command']) + fname = f['file'] + repo = get_repo_from_name(fname) + os.chdir(repo) + p = subprocess.Popen('git stash', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p.communicate() + l1 = run_and_parse_numeric_output(f['directory'], preproc_cmd) + os.chdir(repo) + p = subprocess.Popen('git stash pop', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p.communicate() + l2 = run_and_parse_numeric_output(f['directory'], preproc_cmd) + if l1 > l2 + removed_lines*5: + return True + return False + +def restore_original(f): + fname = f['file'] + os.chdir(get_repo_from_name(fname)) + subprocess.call('git checkout %s >& /dev/null' % fname, shell=True) + return + +def tests_pass(): + os.chdir(build_folder) + rc = subprocess.call(tests_command, shell=True) + return rc == 0 + +def optimize_file(f): + redundant_headers = eagerly_remove_headers(f) + if not preprocessed_size_changed(f, redundant_headers): + restore_original(f) + return False + + redundant = redundant_headers + assert can_remove(redundant, f) + orig_size = get_preprocessed_size(f, []) + optimized_size = get_preprocessed_size(f, redundant) + useless_to_remove = [] + need_to_keep = 0 + while need_to_keep < len(redundant): + without_current = redundant[:need_to_keep] + redundant[need_to_keep+1:] + restore_original(f) + if not can_remove(without_current, f): + print 'This one we\'ll remove: ', redundant[need_to_keep] + need_to_keep += 1 + continue + current_size = get_preprocessed_size(f, without_current) + + if include_removed(f, redundant[need_to_keep]) and \ + optimized_size + (current_size - optimized_size)*10 < orig_size: +# if include_removed(f, redundant[need_to_keep]): + # Remove element (need_to_keep+1) + print 'This one we can keep: ', redundant[need_to_keep] + redundant = without_current + else: + print 'This one we\'ll remove: ', redundant[need_to_keep] + need_to_keep += 1 + + optimized_size = get_preprocessed_size(f, redundant) + print 'Preprocessed size change: %d -> %d' % (orig_size, optimized_size) + restore_original(f) + assert can_remove(redundant, f) + + if do_tests and not tests_pass(): + restore_original(f) + return False + + commit(f) + return True + +def main(): + db = load_data(compile_db_file) + files_list = [] + if files_list_name != '': + files_list = load_data(files_list_name) + try: + processed = load_data(processed_files) + except IOError: + processed = [] + + l = 0 + for f in db: + basename = get_name(f['file']) + if len(files_list) > 0 and not basename in files_list: + continue + l += 1 + + i = 0 + for f in db: + basename = get_name(f['file']) + if len(files_list) > 0 and not basename in files_list: + continue + i += 1 + print bcolors.UNDERLINE + bcolors.BOLD + '[%d/%d] ' % (i, l) + basename + bcolors.ENDC + if f['file'] in processed: + print bcolors.OKBLUE + 'SKIP: ' + bcolors.ENDC + 'already processed!' + continue + + try: + r = optimize_file(f) + except KeyboardInterrupt: + restore_original(f) + print bcolors.WARNING + 'Interrupted!' + bcolors.ENDC + save_data(processed, processed_files) + return + + if r: + print bcolors.OKGREEN + 'Optimized!' + bcolors.ENDC + else: + print bcolors.OKBLUE + 'Nothing interesting:(' + bcolors.ENDC + processed.append(f['file']) + save_data(processed, processed_files) + +if __name__ == '__main__': + main() + + +