Index: utils/compare.py =================================================================== --- utils/compare.py +++ utils/compare.py @@ -12,6 +12,7 @@ import re import numbers import argparse +import math def read_lit_json(filename): import json @@ -207,6 +208,12 @@ else: return "%-5d" % value +def strip_name_fully(name): + name = name.split('/')[-1] + if name.endswith('.test'): + name = name[:-5] + return name + def print_result(d, limit_output=True, shorten_names=True, minimal_names=False, show_diff_column=True, sortkey='diff', sort_by_abs=True): # sort (TODO: is there a more elegant way than create+drop a column?) @@ -240,13 +247,6 @@ name = name[:-common_suffix] return "%-45s" % truncate(name, 10, 30) - - def strip_name_fully(name): - name = name.split('/')[-1] - if name.endswith('.test'): - name = name[:-5] - return name - if minimal_names: formatters['Program'] = strip_name_fully else: @@ -262,6 +262,162 @@ print(out) print(d.describe()) +def strip_extension_from_file_name(file_name): + # drop .json/.csv suffix; TODO: Should we rather do this in the printing + # logic? + for ext in ['.csv', '.json']: + if file_name.endswith(ext): + file_name = file_name[:-len(ext)] + return file_name + +def draw_plots(data, files, output_path, metrics): + try: + import matplotlib.pyplot as plt + except ImportError: + sys.stderr.write('matplotlib.pyplot not found.') + sys.exit(1) + try: + from matplotlib.cm import get_cmap + except ImportError: + sys.stderr.write('get_cmap from matplotlib.cm not found.') + sys.exit(1) + + # Prepare the mean values + d = data.mean() + del d['diff'] + + file_name_a = strip_extension_from_file_name(files[0]) + file_name_b = strip_extension_from_file_name(files[1]) + + mean_idx, test_idx = 0, 1 + fig, axs = plt.subplots(2, len(metrics), squeeze=False) + fig.suptitle( + 'Mean values and top 10 most differing test results for the given metric(s)\n{} vs. {}'.format(file_name_a, file_name_b), + fontsize=5, + fontweight='bold', + color='black' + ) + + cmap = get_cmap("tab10") + colors = cmap.colors + + for i in range(len(metrics)): + bars = [] + axs[mean_idx][i].set_title( + 'Metric: {}'.format(metrics[i]), + fontsize=5, + fontweight='bold', + color='black' + ) + axs[mean_idx][i].set_xticks([]) + axs[mean_idx][i].set_xticklabels([]) + axs[mean_idx][i].spines['top'].set_visible(False) + axs[mean_idx][i].spines['right'].set_visible(False) + for j in range(len(files)): + file_name = strip_extension_from_file_name(files[j]) + bar = axs[mean_idx][i].bar( + j * 5, + d["{}:{}".format(metrics[i], file_name)], + width=0.5, + color=colors[j] + ) + bars.append(bar) + axs[mean_idx][i].text( + j * 5, + d["{}:{}".format(metrics[i], file_name)] * 1.025, + s=str(round(d["{}:{}".format(metrics[i], file_name)], 3)), + color=colors[j], + fontstyle='italic', + fontweight='bold', + fontsize=10, + horizontalalignment='center' + ) + axs[mean_idx][i].legend(bars, files, prop={'size': 5}, loc='lower center') + + # Prepare test results + del data['diff'] + + for i in range(len(metrics)): + test_result_diffs = {} + data_key_a = "{}:{}".format(metrics[i], file_name_a) + data_key_b = "{}:{}".format(metrics[i], file_name_b) + for test_name in data[data_key_a].keys(): + if test_name not in data[data_key_b].keys(): + continue + value_a = data[data_key_a][test_name] + value_b = data[data_key_b][test_name] + if value_a == 0 or value_b == 0: + continue + test_result_diff = max(value_a, value_b) / min(value_a, value_b) - 1.0 + if (value_b > value_a): + test_result_diff *= -1 + if math.isnan(test_result_diff) or math.isinf(test_result_diff): + continue + test_result_diffs[test_name] = test_result_diff + sorted_test_result_diffs = dict(sorted(test_result_diffs.items(), key=lambda item: abs(item[1]), reverse=True)) + size = min(len(sorted_test_result_diffs), 10) + top_test_result_diffs = list(sorted_test_result_diffs.items())[:size] + fontdict = { + 'fontsize': 5, + 'fontstyle': 'italic', + 'fontweight': 'bold', + 'horizontalalignment': 'center' + } + axs[test_idx][i].set_title( + 'Metric: {}'.format(metrics[i]), + fontsize=5, + fontweight='bold', + color='black' + ) + axs[test_idx][i].set_xlabel('Test name', fontdict=fontdict) + axs[test_idx][i].set_ylabel('Increase/Decrease (in %)', fontdict=fontdict) + axs[test_idx][i].set_xticks([0.5 + x for x in range(0, size)]) + test_names = [strip_name_fully(test_result_info[0]) for test_result_info in top_test_result_diffs] + fontdict['fontsize'] = 3.775 + axs[test_idx][i].set_xticklabels(test_names, fontdict=fontdict, rotation=45) + axs[test_idx][i].spines['top'].set_visible(False) + axs[test_idx][i].spines['right'].set_visible(False) + test_results = [test_result_info[1] for test_result_info in top_test_result_diffs] + axs[test_idx][i].bar( + list(range(0, 10)), + height=test_results, + edgecolor='black', + linewidth=0.25, + color='green', + width=1, + align='edge' + ) + for rect, label in zip(axs[test_idx][i].patches, ["{}{}%".format("+" if test_result > 0 else "", round(test_result, 2)) for test_result in test_results]): + height = rect.get_height() + axs[test_idx][i].text( + rect.get_x() + rect.get_width() / 2, + height * 1.025, + label, + fontstyle='italic', + fontweight='bold', + fontsize=5, + horizontalalignment='center', + verticalalignment=('bottom' if (height > 0) else 'top') + ) + + plt.tight_layout() + + if output_path == '-': + plt.savefig('plot.png', dpi=500) + print('The plot was saved at: plot.png.') + else: + plt.savefig( + os.path.normpath( + os.path.join(output_path, 'plot.png') + ), + dpi=500 + ) + print('The plot was saved at: {}.'.format( + os.path.normpath( + os.path.join(output_path, 'plot.png') + ) + )) + if __name__ == "__main__": parser = argparse.ArgumentParser(prog='compare.py') parser.add_argument('-a', '--all', action='store_true') @@ -295,8 +451,17 @@ dest='minimal_names', default=False) parser.add_argument('--no-abs-sort', action='store_true', dest='no_abs_sort', default=False, help="Don't use abs() when sorting results") + parser.add_argument('-plot', action='store_true', dest='plot', default=False) + parser.add_argument('--plot-path', nargs=1, type=str, action='store', dest='plot_path', metavar='[=]', default='-') config = parser.parse_args() + if (config.plot_path != '-' and not config.plot): + sys.stderr.write('Please specify --plot-path only with the -plot option.') + sys.exit(1) + if (config.plot_path != '-' and not os.path.isdir(config.plot_path)): + sys.stderr.write('Provided --plot-path is not a valid directory!') + sys.exit(1) + if config.show_diff is None: config.show_diff = len(config.files) > 1 @@ -316,6 +481,7 @@ # Combine to new dataframe data = pd.concat([lhs_merged, rhs_merged], names=['l/r'], keys=[config.lhs_name, config.rhs_name]) + files = [config.lhs_name, config.rhs_name] else: data = readmulti(files) @@ -377,7 +543,7 @@ data = data.unstack(level=0) # unstack() gave us a complicated multiindex for the columns, simplify # things by renaming to a simple index. - data.columns = [(c[1] if c[1] else c[0]) for c in data.columns.values] + data.columns = ["{}:{}".format(c[0], c[1]) for c in data.columns.values] data = add_diff_column(data) @@ -390,3 +556,6 @@ shorten_names = not config.full limit_output = (not config.all) and (not config.full) print_result(data, limit_output, shorten_names, config.minimal_names, config.show_diff, sortkey, config.no_abs_sort) + + if (config.plot): + draw_plots(data, files, config.plot_path, metrics)