Index: lnt/trunk/lnt/server/db/fieldchange.py =================================================================== --- lnt/trunk/lnt/server/db/fieldchange.py +++ lnt/trunk/lnt/server/db/fieldchange.py @@ -92,8 +92,9 @@ for field in list(ts.Sample.get_metric_fields()): for test_id in runinfo.test_ids: f = None - result = runinfo.get_comparison_result(runs, previous_runs, - test_id, field) + result = runinfo.get_comparison_result( + runs, previous_runs, test_id, field, + ts.Sample.get_hash_of_binary_field()) # Try and find a matching FC and update, else create one. try: f = ts.query(ts.FieldChange) \ Index: lnt/trunk/lnt/server/db/regression.py =================================================================== --- lnt/trunk/lnt/server/db/regression.py +++ lnt/trunk/lnt/server/db/regression.py @@ -135,7 +135,8 @@ runs_all.extend(runs.after) ri = RunInfo(ts, [r.id for r in runs_all], only_tests=[field_change.test_id]) cr = ri.get_comparison_result(runs.after, runs.before, - field_change.test.id, field_change.field) + field_change.test.id, field_change.field, + ts.Sample.get_hash_of_binary_field()) return cr, runs.after[0], runs_all Index: lnt/trunk/lnt/server/db/testsuitedb.py =================================================================== --- lnt/trunk/lnt/server/db/testsuitedb.py +++ lnt/trunk/lnt/server/db/testsuitedb.py @@ -368,6 +368,23 @@ if field.type.name == 'Real': yield field + @staticmethod + def get_hash_of_binary_field(): + """ + get_hash_of_binary_field() -> SampleField + + Get the sample field which represents a hash of the binary + being tested. This field will compare equal iff two binaries + are considered to be identical, e.g. two different compilers + producing identical code output. + + Returns None if such a field isn't available. + """ + for field in self.Sample.fields: + if field.name == 'hash': + return field + return None + # Dynamically create fields for all of the test suite defined # sample fields. # Index: lnt/trunk/lnt/server/reporting/analysis.py =================================================================== --- lnt/trunk/lnt/server/reporting/analysis.py +++ lnt/trunk/lnt/server/reporting/analysis.py @@ -2,10 +2,15 @@ Utilities for helping with the analysis of data, for reporting purposes. """ +import logging + from lnt.util import stats from lnt.server.ui import util from lnt.testing import FAIL +LOGGER_NAME = "lnt.server.ui.app" +logger = logging.getLogger(LOGGER_NAME) + REGRESSED = 'REGRESSED' IMPROVED = 'IMPROVED' UNCHANGED_PASS = 'UNCHANGED_PASS' @@ -55,6 +60,7 @@ def __init__(self, aggregation_fn, cur_failed, prev_failed, samples, prev_samples, + cur_hash, prev_hash, confidence_lv=0.05, bigger_is_better=False): self.aggregation_fn = aggregation_fn @@ -63,6 +69,9 @@ if aggregation_fn == stats.safe_min and bigger_is_better: aggregation_fn = stats.safe_max + self.cur_hash = cur_hash + self.prev_hash = prev_hash + if samples: self.current = aggregation_fn(samples) else: @@ -113,11 +122,13 @@ """Print this ComparisonResult's constructor. Handy for generating test cases for comparisons doing odd things.""" - fmt = "{}(" + "{}, " * 7 + ")" + fmt = "{}(" + "{}, " * 9 + ")" return fmt.format(self.__class__.__name__, self.aggregation_fn.__name__, self.failed, self.prev_failed, + self.cur_hash, + self.prev_hash, self.samples, self.prev_samples, self.confidence_lv, @@ -271,12 +282,14 @@ return runs, compare_runs - def get_run_comparison_result(self, run, compare_to, test_id, field): + def get_run_comparison_result(self, run, compare_to, test_id, field, + hash_of_binary_field): if compare_to is not None: compare_to = [compare_to] else: compare_to = [] - return self.get_comparison_result([run], compare_to, test_id, field) + return self.get_comparison_result([run], compare_to, test_id, field, + hash_of_binary_field) def get_samples(self, runs, test_id): all_samples = [] @@ -286,7 +299,8 @@ all_samples.extend(samples) return all_samples - def get_comparison_result(self, runs, compare_runs, test_id, field): + def get_comparison_result(self, runs, compare_runs, test_id, field, + hash_of_binary_field): # Get the field which indicates the requested field's status. status_field = field.status_field @@ -312,31 +326,63 @@ if s[field.index] is not None] prev_values = [s[field.index] for s in prev_samples if s[field.index] is not None] - + if hash_of_binary_field: + hash_values = [s[hash_of_binary_field.index] for s in run_samples + if s[field.index] is not None] + prev_hash_values = [s[hash_of_binary_field.index] + for s in prev_samples + if s[field.index] is not None] + + # All hash_values and all prev_hash_values should all be the same. + # Warn in the log when the hash wasn't the same for all samples. + cur_hash_set = set(hash_values) + prev_hash_set = set(prev_hash_values) + if len(cur_hash_set) > 1: + logger.warning("Found different hashes for multiple samples " + + "in the same run %r: %r", + runs, hash_values) + if len(prev_hash_set) > 1: + logger.warning("Found different hashes for multiple samples " + + "in the same run %r: %r", + compare_runs, prev_hash_values) + cur_hash = hash_values[0] if len(hash_values) > 0 else None + prev_hash = prev_hash_values[0] \ + if len(prev_hash_values) > 0 else None + else: + cur_hash = None + prev_hash = None r = ComparisonResult(self.aggregation_fn, run_failed, prev_failed, run_values, - prev_values, self.confidence_lv, + prev_values, cur_hash, prev_hash, + self.confidence_lv, bigger_is_better=field.bigger_is_better) return r def get_geomean_comparison_result(self, run, compare_to, field, tests): if tests: - prev_values,run_values = zip( - *[(cr.previous, cr.current) for _,_,cr in tests + prev_values, run_values, prev_hash, cur_hash = zip( + *[(cr.previous, cr.current, cr.prev_hash, cr.cur_hash) + for _, _, cr in tests if cr.get_test_status() == UNCHANGED_PASS]) prev_values = [x for x in prev_values if x is not None] run_values = [x for x in run_values if x is not None] + prev_hash = [x for x in prev_hash if x is not None] + cur_hash = [x for x in cur_hash if x is not None] + prev_hash = prev_hash[0] if len(prev_hash) > 0 else None + cur_hash = cur_hash[0] if len(cur_hash) > 0 else None prev_geo = calc_geomean(prev_values) prev_values = [prev_geo] if prev_geo else [] run_values = [calc_geomean(run_values)] else: - prev_values, run_values = [], [] + prev_values, run_values, prev_hash, cur_hash = [], [], None, None return ComparisonResult(self.aggregation_fn, cur_failed=not bool(run_values), prev_failed=not bool(prev_values), samples=run_values, prev_samples=prev_values, + cur_hash=cur_hash, + prev_hash=prev_hash, confidence_lv=0, bigger_is_better=field.bigger_is_better) Index: lnt/trunk/lnt/server/reporting/dailyreport.py =================================================================== --- lnt/trunk/lnt/server/reporting/dailyreport.py +++ lnt/trunk/lnt/server/reporting/dailyreport.py @@ -21,6 +21,7 @@ class DayResult: def __init__(self, comparisonResult): self.cr = comparisonResult + self.hash = self.cr.cur_hash self.samples = self.cr.samples if self.samples is None: self.samples = [] @@ -70,6 +71,7 @@ day_start_offset_hours=16, for_mail=False, filter_machine_regex=None): self.ts = ts + self.hash_of_binary_field = self.ts.Sample.get_hash_of_binary_field() self.num_prior_days_to_include = num_prior_days_to_include self.year = year self.month = month @@ -314,7 +316,8 @@ prev_runs = self.machine_past_runs.get( (machine.id, prev_day_index), ()) cr = sri.get_comparison_result( - day_runs, prev_runs, test.id, field) + day_runs, prev_runs, test.id, field, + self.hash_of_binary_field) # If the result is not "interesting", ignore this machine. if not cr.is_result_interesting(): @@ -332,8 +335,9 @@ prev_day_index = find_most_recent_run_with_samples(i) prev_runs = self.machine_past_runs.get( (machine.id, prev_day_index), ()) - cr = sri.get_comparison_result(day_runs, prev_runs, - test.id, field) + cr = sri.get_comparison_result( + day_runs, prev_runs, test.id, field, + self.hash_of_binary_field) day_results.append(DayResult(cr)) day_results.complete() Index: lnt/trunk/lnt/server/reporting/runs.py =================================================================== --- lnt/trunk/lnt/server/reporting/runs.py +++ lnt/trunk/lnt/server/reporting/runs.py @@ -76,13 +76,15 @@ # Gather the run-over-run changes to report, organized by field and then # collated by change type. run_to_run_info, test_results = _get_changes_by_type( - run, compare_to, metric_fields, test_names, num_comparison_runs, sri) + ts, run, compare_to, metric_fields, test_names, num_comparison_runs, + sri) # If we have a baseline, gather the run-over-baseline results and # changes. if baseline: run_to_baseline_info, baselined_results = _get_changes_by_type( - run, baseline, metric_fields, test_names, num_comparison_runs, sri) + ts, run, baseline, metric_fields, test_names, num_comparison_runs, + sri) else: run_to_baseline_info = baselined_results = None @@ -223,7 +225,7 @@ return subject, text_report, html_report, sri -def _get_changes_by_type(run_a, run_b, metric_fields, test_names, +def _get_changes_by_type(ts, run_a, run_b, metric_fields, test_names, num_comparison_runs, sri): comparison_results = {} results_by_type = [] @@ -236,9 +238,11 @@ added_tests = [] existing_failures = [] unchanged_tests = [] - for name,test_id in test_names: - cr = sri.get_run_comparison_result(run_a, run_b, test_id, field) - comparison_results[(name,field)] = cr + for name, test_id in test_names: + cr = sri.get_run_comparison_result( + run_a, run_b, test_id, field, + ts.Sample.get_hash_of_binary_field()) + comparison_results[(name, field)] = cr test_status = cr.get_test_status() perf_status = cr.get_value_status() if test_status == lnt.server.reporting.analysis.REGRESSED: Index: lnt/trunk/lnt/server/ui/regression_views.py =================================================================== --- lnt/trunk/lnt/server/ui/regression_views.py +++ lnt/trunk/lnt/server/ui/regression_views.py @@ -376,8 +376,8 @@ runinfo = lnt.server.reporting.analysis.RunInfo(ts, runs_to_load) - result = runinfo.get_comparison_result(runs, previous_runs, - test_id, field) + result = runinfo.get_comparison_result( + runs, previous_runs, test_id, field, ts.Sample.get_hash_of_binary_field) # Try and find a matching FC and update, else create one. f = None Index: lnt/trunk/lnt/server/ui/templates/v4_run.html =================================================================== --- lnt/trunk/lnt/server/ui/templates/v4_run.html +++ lnt/trunk/lnt/server/ui/templates/v4_run.html @@ -4,6 +4,7 @@ {% set compare_to = request_info.compare_to %} {% set baseline = request_info.baseline %} {% set ts = request_info.ts %} +{% set hash_field = ts.Sample.get_hash_of_binary_field() %} {% set machine = run.machine %} {% set neighboring_runs = request_info.neighboring_runs %} {% set comparison_neighboring_runs = request_info.comparison_neighboring_runs %} @@ -328,7 +329,8 @@ {% set tests = [] %} {% set (runs, compare_runs) = request_info.sri.get_sliding_runs(run, compare_to, request_info.num_comparison_runs) %} {% for test_name,test_id in test_info %} - {% set cr = request_info.sri.get_comparison_result(runs, compare_runs, test_id, field) %} + {% set cr = request_info.sri.get_comparison_result( + runs, compare_runs, test_id, field, hash_field) %} {% if cr.previous is not none or cr.current is not none %} {% if cr.current is none or cr.current >= test_min_value_filter %} {% if tests.append((test_name, test_id, cr)) %}{% endif %} @@ -383,7 +385,8 @@ {{ test_name }} {% for field in metric_fields %} - {% set cr = request_info.sri.get_run_comparison_result(run, compare_to, test_id, field) %} + {% set cr = request_info.sri.get_run_comparison_result( + run, compare_to, test_id, field, hash_field) %} {{cr.previous}} {{cr.current}} {{cr.pct_delta}} Index: lnt/trunk/lnt/server/ui/views.py =================================================================== --- lnt/trunk/lnt/server/ui/views.py +++ lnt/trunk/lnt/server/ui/views.py @@ -395,7 +395,9 @@ test = {} test['name'] = test_name for sample_field in ts.sample_fields: - res = sri.get_run_comparison_result(run, None, test_id, sample_field) + res = sri.get_run_comparison_result( + run, None, test_id, sample_field, + ts.Sample.get_hash_of_binary_field()) test[sample_field.name] = res.current json_obj['tests'][test_id] = test @@ -1005,18 +1007,20 @@ # Build the test matrix. This is a two dimensional table index by # (machine-index, test-index), where each entry is the percent change. test_table = [] - for i,(test_id,test_name) in enumerate(reported_tests): + for i, (test_id, test_name) in enumerate(reported_tests): # Create the row, starting with the test name and worst entry. row = [(test_id, test_name), None] # Compute comparison results for each machine. - row.extend((runinfo.get_run_comparison_result(run, baseline, test_id, - field), run.id) - for baseline,run in machine_run_info) + row.extend((runinfo.get_run_comparison_result( + run, baseline, test_id, field, + ts.Sample.get_hash_of_Binary_field), + run.id) + for baseline, run in machine_run_info) # Compute the worst cell value. row[1] = max(cr.pct_delta - for cr,_ in row[2:]) + for cr, _ in row[2:]) test_table.append(row) Index: lnt/trunk/tests/server/reporting/analysis.py =================================================================== --- lnt/trunk/tests/server/reporting/analysis.py +++ lnt/trunk/tests/server/reporting/analysis.py @@ -114,7 +114,8 @@ """Test a real example.""" curr_samples = [0.0887, 0.0919, 0.0903] prev = [0.0858] - uninteresting = ComparisonResult(min, False, False, curr_samples, prev) + uninteresting = ComparisonResult( + min, False, False, curr_samples, prev, None, None) self.assertFalse(uninteresting.is_result_interesting()) self.assertEquals(uninteresting.get_test_status(), UNCHANGED_PASS) @@ -122,187 +123,200 @@ def test_slower(self): """Test getting a simple regression.""" - slower = ComparisonResult(min, False, False, [10.], [5.]) + slower = ComparisonResult(min, False, False, [10.], [5.], None, None) self.assertEquals(slower.get_value_status(), REGRESSED) self.assertTrue(slower.is_result_interesting()) def test_faster(self): """Test getting a simple improvement.""" - faster = ComparisonResult(min, False, False, [5.], [10.]) + faster = ComparisonResult(min, False, False, [5.], [10.], None, None) self.assertEquals(faster.get_value_status(), IMPROVED) self.assertTrue(faster.is_result_interesting()) def test_really_faster(self): """Test getting a simple improvement.""" - faster = ComparisonResult(min, False, False, [5., 6.], [10., 10., 10.]) + faster = ComparisonResult( + min, False, False, [5., 6.], [10., 10., 10.], None, None) self.assertEquals(faster.get_value_status(), IMPROVED) self.assertTrue(faster.is_result_interesting()) def test_improved_status(self): """Test getting a test status improvement.""" - improved = ComparisonResult(min, False, True, [1.], None) + improved = ComparisonResult(min, False, True, [1.], None, None, None) self.assertEquals(improved.get_test_status(), IMPROVED) def test_regressed_status(self): """Test getting a test status improvement.""" - improved = ComparisonResult(min, True, False, None, [10.]) + improved = ComparisonResult(min, True, False, None, [10.], None, None) self.assertEquals(improved.get_test_status(), REGRESSED) def test_keep_on_failing_status(self): """Test getting a repeated fail.""" - improved = ComparisonResult(min, True, True, None, None) + improved = ComparisonResult(min, True, True, None, None, None, None) self.assertEquals(improved.get_test_status(), UNCHANGED_FAIL) def test_noticeable_regression(self): """Test a big looking jump.""" - regressed = ComparisonResult(min, False, False, [10.0, 10.1], - [5.0, 5.1, 4.9, 5.0]) + regressed = ComparisonResult( + min, False, False, [10.0, 10.1], [5.0, 5.1, 4.9, 5.0], None, None) self.assertEquals(regressed.get_value_status(), REGRESSED) def test_no_regression_flat_line(self): """This is a flat line, it should have no changes.""" - flat = ComparisonResult(min, False, False, [1.0], FLAT_LINE[0:10]) + flat = ComparisonResult( + min, False, False, [1.0], FLAT_LINE[0:10], None, None) self.assertEquals(flat.get_value_status(), UNCHANGED_PASS) def test_no_regression_flat_line_noise(self): """Now 4% noise.""" - flat = ComparisonResult(min, False, False, [1.020], FLAT_NOISE[0:10]) + flat = ComparisonResult( + min, False, False, [1.020], FLAT_NOISE[0:10], None, None) ret = flat.get_value_status() self.assertEquals(ret, UNCHANGED_PASS) def test_big_no_regression_flat_line_noise(self): """Same data, but bigger 10 + 5% variation.""" - flat = ComparisonResult(min, False, False, [10.25], FLAT_NOISE2[0:10]) + flat = ComparisonResult( + min, False, False, [10.25], FLAT_NOISE2[0:10], None, None) ret = flat.get_value_status() self.assertEquals(ret, UNCHANGED_PASS) def test_big_no_regression_flat_line_multi(self): """Same data, but bigger 10 + 5% variation, multisample current.""" - flat = ComparisonResult(min, False, False, [10.0606, 10.4169, 10.1859], - BIG_NUMBERS_FLAT[0:10]) + flat = ComparisonResult( + min, False, False, [10.0606, 10.4169, 10.1859], + BIG_NUMBERS_FLAT[0:10], None, None) ret = flat.get_value_status() self.assertEquals(ret, UNCHANGED_PASS) def test_simple_regression(self): """Flat line that jumps to another flat line.""" flat = ComparisonResult( - min, False, False, [SIMPLE_REGRESSION[10]], SIMPLE_REGRESSION[0:9]) + min, False, False, [SIMPLE_REGRESSION[10]], SIMPLE_REGRESSION[0:9], + None, None) self.assertEquals(flat.get_value_status(), REGRESSED) def test_noisy_regression_5(self): """A regression in 5% noise.""" - flat = ComparisonResult(min, False, False, [12.2821], REGRESS_5[0:9]) + flat = ComparisonResult(min, False, False, [12.2821], REGRESS_5[0:9], + None, None) self.assertEquals(flat.get_value_status(), REGRESSED) def test_noisy_regression_5_multi(self): """A regression in 5% noise, more current samples.""" flat = ComparisonResult(min, False, False, [12.2821, 12.2141, 12.3077], - MS_5_REG[0:9]) + MS_5_REG[0:9], None, None) ret = flat.get_value_status() self.assertEquals(ret, REGRESSED) def test_simple_improvement(self): """An improvement without noise.""" - flat = ComparisonResult(min, False, False, [IMP[10]], IMP[0:9]) + flat = ComparisonResult(min, False, False, [IMP[10]], IMP[0:9], None, + None) self.assertEquals(flat.get_value_status(), IMPROVED) def test_noise_improvement(self): """An improvement with 5% noise.""" flat = ComparisonResult(min, False, False, [IMP_NOISE[10]], - IMP_NOISE[0:9]) + IMP_NOISE[0:9], None, None) self.assertEquals(flat.get_value_status(), IMPROVED) def test_bimodal(self): """A bimodal line, with no regressions.""" bimodal = ComparisonResult(min, False, False, [BIMODAL[10]], - BIMODAL[0:9]) + BIMODAL[0:9], None, None) self.assertEquals(bimodal.get_value_status(), UNCHANGED_PASS) def test_noise_bimodal(self): """Bimodal line with 5% noise.""" bimodal = ComparisonResult(min, False, False, [BIMODAL_NOISE[10]], - BIMODAL_NOISE[0:9]) + BIMODAL_NOISE[0:9], None, None) self.assertEquals(bimodal.get_value_status(), UNCHANGED_PASS) def test_bimodal_alternating(self): """Bimodal which sticks in a mode for a while.""" bimodal = ComparisonResult(min, False, False, [BM_ALTERNATE[10]], - BM_ALTERNATE[0:9]) + BM_ALTERNATE[0:9], None, None) self.assertEquals(bimodal.get_value_status(), UNCHANGED_PASS) def test_noise_bimodal_alternating(self): """Bimodal alternating with 5% noise.""" bimodal = ComparisonResult(min, False, False, [BM_AL_NOISE[10]], - BM_AL_NOISE[0:9]) + BM_AL_NOISE[0:9], None, None) self.assertEquals(bimodal.get_value_status(), UNCHANGED_PASS) def test_bimodal_alternating_regression(self): """Bimodal alternating regression.""" bimodal = ComparisonResult(min, False, False, [BM_AL_REG[11]], - BM_AL_REG[0:10]) + BM_AL_REG[0:10], None, None) self.assertEquals(bimodal.get_value_status(), REGRESSED) def test_bimodal_regression(self): """A regression in a bimodal line.""" bimodal = ComparisonResult(min, False, False, [BM_REGRESSION[12]], - BM_REGRESSION[0:11]) + BM_REGRESSION[0:11], None, None) self.assertEquals(bimodal.get_value_status(), REGRESSED) def test_noise_bimodal_regression(self): bimodal = ComparisonResult( - min, False, False, [BM_REGS_NOISE[12]], BM_REGS_NOISE[0:11]) + min, False, False, [BM_REGS_NOISE[12]], BM_REGS_NOISE[0:11], None, + None) self.assertEquals(bimodal.get_value_status(), REGRESSED) def test_bimodal_overlapping_regression(self): bimodal = ComparisonResult(min, False, False, [BM_REG_OVERLAP[12]], - BM_REG_OVERLAP[0:11]) + BM_REG_OVERLAP[0:11], None, None) self.assertEquals(bimodal.get_value_status(), REGRESSED) def test_noise_bimodal_overlapping_regression(self): bimodal = ComparisonResult( min, False, False, [BM_REG_OVER_NOISE[12]], - BM_REG_OVER_NOISE[0:11]) + BM_REG_OVER_NOISE[0:11], None, None) self.assertEquals(bimodal.get_value_status(), REGRESSED) def test_single_spike(self): - spike = ComparisonResult(min, False, False, [SPIKE[11]], SPIKE[0:10]) + spike = ComparisonResult( + min, False, False, [SPIKE[11]], SPIKE[0:10], None, None) # Fixme # self.assertEquals(spike.get_value_status(), UNCHANGED_PASS) def test_noise_single_spike(self): - spike = ComparisonResult(min, False, False, - [NOISE_SPIKE[8]], NOISE_SPIKE[0:7]) + spike = ComparisonResult( + min, False, False, [NOISE_SPIKE[8]], NOISE_SPIKE[0:7], None, None) # Fixme # self.assertEquals(spike.get_value_status(), UNCHANGED_PASS) def test_slow_regression(self): - slow = ComparisonResult(min, False, False, - [SLOW_REG[12]], SLOW_REG[0:11]) + slow = ComparisonResult( + min, False, False, [SLOW_REG[12]], SLOW_REG[0:11], None, None) self.assertEquals(slow.get_value_status(), REGRESSED) def test_noise_slow_regression(self): slow = ComparisonResult( - min, False, False, [SLOW_REG_NOISE[12]], SLOW_REG_NOISE[0:11]) + min, False, False, [SLOW_REG_NOISE[12]], SLOW_REG_NOISE[0:11], + None, None) self.assertEquals(slow.get_value_status(), REGRESSED) def test_slow_improvement(self): slow = ComparisonResult( - min, False, False, [SLOW_IMP[12]], SLOW_IMP[0:11]) + min, False, False, [SLOW_IMP[12]], SLOW_IMP[0:11], None, None) # Fixme # self.assertEquals(slow.get_value_status(), IMPROVED) def test_noise_slow_improvement(self): slow = ComparisonResult( - min, False, False, [SLOW_IMP_NOISE[12]], SLOW_IMP_NOISE[0:11]) + min, False, False, [SLOW_IMP_NOISE[12]], SLOW_IMP_NOISE[0:11], + None, None) # Fixme # self.assertEquals(slow.get_value_status(), IMPROVED) def test_handle_zero_sample(self): for agfn in (min, median): zeroSample = ComparisonResult( - agfn, False, False, [0.005, 0.0047, 0.0048], [0.0, 0.01, 0.01]) + agfn, False, False, [0.005, 0.0047, 0.0048], [0.0, 0.01, 0.01], + None, None) self.assertEquals(zeroSample.get_value_status(), UNCHANGED_PASS)