Index: lnt/server/db/fieldchange.py
===================================================================
--- lnt/server/db/fieldchange.py
+++ lnt/server/db/fieldchange.py
@@ -92,8 +92,9 @@
for field in list(ts.Sample.get_metric_fields()):
for test_id in runinfo.test_ids:
f = None
- result = runinfo.get_comparison_result(runs, previous_runs,
- test_id, field)
+ result = runinfo.get_comparison_result(
+ runs, previous_runs, test_id, field,
+ ts.Sample.get_hash_of_binary_field())
# Try and find a matching FC and update, else create one.
try:
f = ts.query(ts.FieldChange) \
Index: lnt/server/db/regression.py
===================================================================
--- lnt/server/db/regression.py
+++ lnt/server/db/regression.py
@@ -135,7 +135,8 @@
runs_all.extend(runs.after)
ri = RunInfo(ts, [r.id for r in runs_all], only_tests=[field_change.test_id])
cr = ri.get_comparison_result(runs.after, runs.before,
- field_change.test.id, field_change.field)
+ field_change.test.id, field_change.field,
+ ts.Sample.get_hash_of_binary_field())
return cr, runs.after[0]
Index: lnt/server/db/testsuitedb.py
===================================================================
--- lnt/server/db/testsuitedb.py
+++ lnt/server/db/testsuitedb.py
@@ -343,6 +343,23 @@
if field.type.name == 'Real':
yield field
+ @staticmethod
+ def get_hash_of_binary_field():
+ """
+ get_hash_of_binary_field() -> SampleField
+
+ Get the sample field which represents a hash of the binary
+ being tested. This field will compare equal iff two binaries
+ are considered to be identical, e.g. two different compilers
+ producing identical code output.
+
+ Returns None if such a field isn't available.
+ """
+ for field in self.Sample.fields:
+ if field.name == 'hash':
+ return field
+ return None
+
# Dynamically create fields for all of the test suite defined
# sample fields.
#
Index: lnt/server/reporting/analysis.py
===================================================================
--- lnt/server/reporting/analysis.py
+++ lnt/server/reporting/analysis.py
@@ -55,6 +55,7 @@
def __init__(self, aggregation_fn,
cur_failed, prev_failed, samples, prev_samples,
+ cur_hash, prev_hash,
confidence_lv=0.05, bigger_is_better=False):
self.aggregation_fn = aggregation_fn
@@ -63,6 +64,9 @@
if aggregation_fn == stats.safe_min and bigger_is_better:
aggregation_fn = stats.safe_max
+ self.cur_hash = cur_hash
+ self.prev_hash = prev_hash
+
if samples:
self.current = aggregation_fn(samples)
else:
@@ -113,11 +117,13 @@
"""Print this ComparisonResult's constructor.
Handy for generating test cases for comparisons doing odd things."""
- fmt = "{}(" + "{}, " * 7 + ")"
+ fmt = "{}(" + "{}, " * 9 + ")"
return fmt.format(self.__class__.__name__,
self.aggregation_fn.__name__,
self.failed,
self.prev_failed,
+ self.cur_hash,
+ self.prev_hash,
self.samples,
self.prev_samples,
self.confidence_lv,
@@ -266,12 +272,14 @@
return runs, compare_runs
- def get_run_comparison_result(self, run, compare_to, test_id, field):
+ def get_run_comparison_result(self, run, compare_to, test_id, field,
+ hash_of_binary_field):
if compare_to is not None:
compare_to = [compare_to]
else:
compare_to = []
- return self.get_comparison_result([run], compare_to, test_id, field)
+ return self.get_comparison_result([run], compare_to, test_id, field,
+ hash_of_binary_field)
def get_samples(self, runs, test_id):
all_samples = []
@@ -281,7 +289,8 @@
all_samples.extend(samples)
return all_samples
- def get_comparison_result(self, runs, compare_runs, test_id, field):
+ def get_comparison_result(self, runs, compare_runs, test_id, field,
+ hash_of_binary_field):
# Get the field which indicates the requested field's status.
status_field = field.status_field
@@ -307,31 +316,53 @@
if s[field.index] is not None]
prev_values = [s[field.index] for s in prev_samples
if s[field.index] is not None]
-
+ if hash_of_binary_field:
+ hash_values = [s[hash_of_binary_field.index] for s in run_samples
+ if s[field.index] is not None]
+ prev_hash_values = [s[hash_of_binary_field.index]
+ for s in prev_samples
+ if s[field.index] is not None]
+
+ # FIXME: all hash_values and all prev_hash_values should all be the
+ # same. Could we somehow warn if this isn't the case?
+ cur_hash = hash_values[0] if len(hash_values) > 0 else None
+ prev_hash = prev_hash_values[0] \
+ if len(prev_hash_values) > 0 else None
+ else:
+ cur_hash = None
+ prev_hash = None
r = ComparisonResult(self.aggregation_fn,
run_failed, prev_failed, run_values,
- prev_values, self.confidence_lv,
+ prev_values, cur_hash, prev_hash,
+ self.confidence_lv,
bigger_is_better=field.bigger_is_better)
return r
def get_geomean_comparison_result(self, run, compare_to, field, tests):
if tests:
- prev_values,run_values = zip(
- *[(cr.previous, cr.current) for _,_,cr in tests
+ prev_values, run_values, prev_hash, cur_hash = zip(
+ *[(cr.previous, cr.current, cr.prev_hash, cr.cur_hash)
+ for _, _, cr in tests
if cr.get_test_status() == UNCHANGED_PASS])
prev_values = [x for x in prev_values if x is not None]
run_values = [x for x in run_values if x is not None]
+ prev_hash = [x for x in prev_hash if x is not None]
+ cur_hash = [x for x in cur_hash if x is not None]
+ prev_hash = prev_hash[0] if len(prev_hash) > 0 else None
+ cur_hash = cur_hash[0] if len(cur_hash) > 0 else None
prev_geo = calc_geomean(prev_values)
prev_values = [prev_geo] if prev_geo else []
run_values = [calc_geomean(run_values)]
else:
- prev_values, run_values = [], []
+ prev_values, run_values, prev_hash, cur_hash = [], [], None, None
return ComparisonResult(self.aggregation_fn,
cur_failed=not bool(run_values),
prev_failed=not bool(prev_values),
samples=run_values,
prev_samples=prev_values,
+ cur_hash=cur_hash,
+ prev_hash=prev_hash,
confidence_lv=0,
bigger_is_better=field.bigger_is_better)
Index: lnt/server/reporting/dailyreport.py
===================================================================
--- lnt/server/reporting/dailyreport.py
+++ lnt/server/reporting/dailyreport.py
@@ -21,6 +21,7 @@
class DayResult:
def __init__(self, comparisonResult):
self.cr = comparisonResult
+ self.hash = self.cr.cur_hash
self.samples = self.cr.samples
if self.samples is None:
self.samples = []
@@ -70,6 +71,7 @@
day_start_offset_hours=16, for_mail=False,
filter_machine_regex=None):
self.ts = ts
+ self.hash_of_binary_field = self.ts.Sample.get_hash_of_binary_field()
self.num_prior_days_to_include = num_prior_days_to_include
self.year = year
self.month = month
@@ -314,7 +316,8 @@
prev_runs = self.machine_past_runs.get(
(machine.id, prev_day_index), ())
cr = sri.get_comparison_result(
- day_runs, prev_runs, test.id, field)
+ day_runs, prev_runs, test.id, field,
+ self.hash_of_binary_field)
# If the result is not "interesting", ignore this machine.
if not cr.is_result_interesting():
@@ -332,8 +335,9 @@
prev_day_index = find_most_recent_run_with_samples(i)
prev_runs = self.machine_past_runs.get(
(machine.id, prev_day_index), ())
- cr = sri.get_comparison_result(day_runs, prev_runs,
- test.id, field)
+ cr = sri.get_comparison_result(
+ day_runs, prev_runs, test.id, field,
+ self.hash_of_binary_field)
day_results.append(DayResult(cr))
day_results.complete()
Index: lnt/server/reporting/runs.py
===================================================================
--- lnt/server/reporting/runs.py
+++ lnt/server/reporting/runs.py
@@ -76,13 +76,15 @@
# Gather the run-over-run changes to report, organized by field and then
# collated by change type.
run_to_run_info, test_results = _get_changes_by_type(
- run, compare_to, metric_fields, test_names, num_comparison_runs, sri)
+ ts, run, compare_to, metric_fields, test_names, num_comparison_runs,
+ sri)
# If we have a baseline, gather the run-over-baseline results and
# changes.
if baseline:
run_to_baseline_info, baselined_results = _get_changes_by_type(
- run, baseline, metric_fields, test_names, num_comparison_runs, sri)
+ ts, run, baseline, metric_fields, test_names, num_comparison_runs,
+ sri)
else:
run_to_baseline_info = baselined_results = None
@@ -223,7 +225,7 @@
return subject, text_report, html_report, sri
-def _get_changes_by_type(run_a, run_b, metric_fields, test_names,
+def _get_changes_by_type(ts, run_a, run_b, metric_fields, test_names,
num_comparison_runs, sri):
comparison_results = {}
results_by_type = []
@@ -236,9 +238,11 @@
added_tests = []
existing_failures = []
unchanged_tests = []
- for name,test_id in test_names:
- cr = sri.get_run_comparison_result(run_a, run_b, test_id, field)
- comparison_results[(name,field)] = cr
+ for name, test_id in test_names:
+ cr = sri.get_run_comparison_result(
+ run_a, run_b, test_id, field,
+ ts.Sample.get_hash_of_binary_field())
+ comparison_results[(name, field)] = cr
test_status = cr.get_test_status()
perf_status = cr.get_value_status()
if test_status == lnt.server.reporting.analysis.REGRESSED:
Index: lnt/server/ui/regression_views.py
===================================================================
--- lnt/server/ui/regression_views.py
+++ lnt/server/ui/regression_views.py
@@ -329,8 +329,8 @@
runinfo = lnt.server.reporting.analysis.RunInfo(ts, runs_to_load)
- result = runinfo.get_comparison_result(runs, previous_runs,
- test_id, field)
+ result = runinfo.get_comparison_result(
+ runs, previous_runs, test_id, field, ts.Sample.get_hash_of_binary_field)
# Try and find a matching FC and update, else create one.
f = None
Index: lnt/server/ui/templates/v4_run.html
===================================================================
--- lnt/server/ui/templates/v4_run.html
+++ lnt/server/ui/templates/v4_run.html
@@ -4,6 +4,7 @@
{% set compare_to = request_info.compare_to %}
{% set baseline = request_info.baseline %}
{% set ts = request_info.ts %}
+{% set hash_field = ts.Sample.get_hash_of_binary_field() %}
{% set machine = run.machine %}
{% set neighboring_runs = request_info.neighboring_runs %}
{% set comparison_neighboring_runs = request_info.comparison_neighboring_runs %}
@@ -328,7 +329,8 @@
{% set tests = [] %}
{% set (runs, compare_runs) = request_info.sri.get_sliding_runs(run, compare_to, request_info.num_comparison_runs) %}
{% for test_name,test_id in test_info %}
- {% set cr = request_info.sri.get_comparison_result(runs, compare_runs, test_id, field) %}
+ {% set cr = request_info.sri.get_comparison_result(
+ runs, compare_runs, test_id, field, hash_field) %}
{% if cr.previous is not none or cr.current is not none %}
{% if cr.current is none or cr.current >= test_min_value_filter %}
{% if tests.append((test_name, test_id, cr)) %}{% endif %}
@@ -383,7 +385,8 @@
{{ test_name }} |
{% for field in metric_fields %}
- {% set cr = request_info.sri.get_run_comparison_result(run, compare_to, test_id, field) %}
+ {% set cr = request_info.sri.get_run_comparison_result(
+ run, compare_to, test_id, field, hash_field) %}
{{cr.previous}} |
{{cr.current}} |
{{cr.pct_delta}} |
Index: lnt/server/ui/views.py
===================================================================
--- lnt/server/ui/views.py
+++ lnt/server/ui/views.py
@@ -395,7 +395,9 @@
test = {}
test['name'] = test_name
for sample_field in ts.sample_fields:
- res = sri.get_run_comparison_result(run, None, test_id, sample_field)
+ res = sri.get_run_comparison_result(
+ run, None, test_id, sample_field,
+ ts.Sample.get_hash_of_binary_field())
test[sample_field.name] = res.current
json_obj['tests'][test_id] = test
@@ -1005,18 +1007,20 @@
# Build the test matrix. This is a two dimensional table index by
# (machine-index, test-index), where each entry is the percent change.
test_table = []
- for i,(test_id,test_name) in enumerate(reported_tests):
+ for i, (test_id, test_name) in enumerate(reported_tests):
# Create the row, starting with the test name and worst entry.
row = [(test_id, test_name), None]
# Compute comparison results for each machine.
- row.extend((runinfo.get_run_comparison_result(run, baseline, test_id,
- field), run.id)
- for baseline,run in machine_run_info)
+ row.extend((runinfo.get_run_comparison_result(
+ run, baseline, test_id, field,
+ ts.Sample.get_hash_of_Binary_field),
+ run.id)
+ for baseline, run in machine_run_info)
# Compute the worst cell value.
row[1] = max(cr.pct_delta
- for cr,_ in row[2:])
+ for cr, _ in row[2:])
test_table.append(row)
Index: tests/server/reporting/analysis.py
===================================================================
--- tests/server/reporting/analysis.py
+++ tests/server/reporting/analysis.py
@@ -114,7 +114,8 @@
"""Test a real example."""
curr_samples = [0.0887, 0.0919, 0.0903]
prev = [0.0858]
- uninteresting = ComparisonResult(min, False, False, curr_samples, prev)
+ uninteresting = ComparisonResult(
+ min, False, False, curr_samples, prev, None, None)
self.assertFalse(uninteresting.is_result_interesting())
self.assertEquals(uninteresting.get_test_status(), UNCHANGED_PASS)
@@ -122,187 +123,200 @@
def test_slower(self):
"""Test getting a simple regression."""
- slower = ComparisonResult(min, False, False, [10.], [5.])
+ slower = ComparisonResult(min, False, False, [10.], [5.], None, None)
self.assertEquals(slower.get_value_status(), REGRESSED)
self.assertTrue(slower.is_result_interesting())
def test_faster(self):
"""Test getting a simple improvement."""
- faster = ComparisonResult(min, False, False, [5.], [10.])
+ faster = ComparisonResult(min, False, False, [5.], [10.], None, None)
self.assertEquals(faster.get_value_status(), IMPROVED)
self.assertTrue(faster.is_result_interesting())
def test_really_faster(self):
"""Test getting a simple improvement."""
- faster = ComparisonResult(min, False, False, [5., 6.], [10., 10., 10.])
+ faster = ComparisonResult(
+ min, False, False, [5., 6.], [10., 10., 10.], None, None)
self.assertEquals(faster.get_value_status(), IMPROVED)
self.assertTrue(faster.is_result_interesting())
def test_improved_status(self):
"""Test getting a test status improvement."""
- improved = ComparisonResult(min, False, True, [1.], None)
+ improved = ComparisonResult(min, False, True, [1.], None, None, None)
self.assertEquals(improved.get_test_status(), IMPROVED)
def test_regressed_status(self):
"""Test getting a test status improvement."""
- improved = ComparisonResult(min, True, False, None, [10.])
+ improved = ComparisonResult(min, True, False, None, [10.], None, None)
self.assertEquals(improved.get_test_status(), REGRESSED)
def test_keep_on_failing_status(self):
"""Test getting a repeated fail."""
- improved = ComparisonResult(min, True, True, None, None)
+ improved = ComparisonResult(min, True, True, None, None, None, None)
self.assertEquals(improved.get_test_status(), UNCHANGED_FAIL)
def test_noticeable_regression(self):
"""Test a big looking jump."""
- regressed = ComparisonResult(min, False, False, [10.0, 10.1],
- [5.0, 5.1, 4.9, 5.0])
+ regressed = ComparisonResult(
+ min, False, False, [10.0, 10.1], [5.0, 5.1, 4.9, 5.0], None, None)
self.assertEquals(regressed.get_value_status(), REGRESSED)
def test_no_regression_flat_line(self):
"""This is a flat line, it should have no changes."""
- flat = ComparisonResult(min, False, False, [1.0], FLAT_LINE[0:10])
+ flat = ComparisonResult(
+ min, False, False, [1.0], FLAT_LINE[0:10], None, None)
self.assertEquals(flat.get_value_status(), UNCHANGED_PASS)
def test_no_regression_flat_line_noise(self):
"""Now 4% noise."""
- flat = ComparisonResult(min, False, False, [1.020], FLAT_NOISE[0:10])
+ flat = ComparisonResult(
+ min, False, False, [1.020], FLAT_NOISE[0:10], None, None)
ret = flat.get_value_status()
self.assertEquals(ret, UNCHANGED_PASS)
def test_big_no_regression_flat_line_noise(self):
"""Same data, but bigger 10 + 5% variation."""
- flat = ComparisonResult(min, False, False, [10.25], FLAT_NOISE2[0:10])
+ flat = ComparisonResult(
+ min, False, False, [10.25], FLAT_NOISE2[0:10], None, None)
ret = flat.get_value_status()
self.assertEquals(ret, UNCHANGED_PASS)
def test_big_no_regression_flat_line_multi(self):
"""Same data, but bigger 10 + 5% variation, multisample current."""
- flat = ComparisonResult(min, False, False, [10.0606, 10.4169, 10.1859],
- BIG_NUMBERS_FLAT[0:10])
+ flat = ComparisonResult(
+ min, False, False, [10.0606, 10.4169, 10.1859],
+ BIG_NUMBERS_FLAT[0:10], None, None)
ret = flat.get_value_status()
self.assertEquals(ret, UNCHANGED_PASS)
def test_simple_regression(self):
"""Flat line that jumps to another flat line."""
flat = ComparisonResult(
- min, False, False, [SIMPLE_REGRESSION[10]], SIMPLE_REGRESSION[0:9])
+ min, False, False, [SIMPLE_REGRESSION[10]], SIMPLE_REGRESSION[0:9],
+ None, None)
self.assertEquals(flat.get_value_status(), REGRESSED)
def test_noisy_regression_5(self):
"""A regression in 5% noise."""
- flat = ComparisonResult(min, False, False, [12.2821], REGRESS_5[0:9])
+ flat = ComparisonResult(min, False, False, [12.2821], REGRESS_5[0:9],
+ None, None)
self.assertEquals(flat.get_value_status(), REGRESSED)
def test_noisy_regression_5_multi(self):
"""A regression in 5% noise, more current samples."""
flat = ComparisonResult(min, False, False, [12.2821, 12.2141, 12.3077],
- MS_5_REG[0:9])
+ MS_5_REG[0:9], None, None)
ret = flat.get_value_status()
self.assertEquals(ret, REGRESSED)
def test_simple_improvement(self):
"""An improvement without noise."""
- flat = ComparisonResult(min, False, False, [IMP[10]], IMP[0:9])
+ flat = ComparisonResult(min, False, False, [IMP[10]], IMP[0:9], None,
+ None)
self.assertEquals(flat.get_value_status(), IMPROVED)
def test_noise_improvement(self):
"""An improvement with 5% noise."""
flat = ComparisonResult(min, False, False, [IMP_NOISE[10]],
- IMP_NOISE[0:9])
+ IMP_NOISE[0:9], None, None)
self.assertEquals(flat.get_value_status(), IMPROVED)
def test_bimodal(self):
"""A bimodal line, with no regressions."""
bimodal = ComparisonResult(min, False, False, [BIMODAL[10]],
- BIMODAL[0:9])
+ BIMODAL[0:9], None, None)
self.assertEquals(bimodal.get_value_status(), UNCHANGED_PASS)
def test_noise_bimodal(self):
"""Bimodal line with 5% noise."""
bimodal = ComparisonResult(min, False, False, [BIMODAL_NOISE[10]],
- BIMODAL_NOISE[0:9])
+ BIMODAL_NOISE[0:9], None, None)
self.assertEquals(bimodal.get_value_status(), UNCHANGED_PASS)
def test_bimodal_alternating(self):
"""Bimodal which sticks in a mode for a while."""
bimodal = ComparisonResult(min, False, False, [BM_ALTERNATE[10]],
- BM_ALTERNATE[0:9])
+ BM_ALTERNATE[0:9], None, None)
self.assertEquals(bimodal.get_value_status(), UNCHANGED_PASS)
def test_noise_bimodal_alternating(self):
"""Bimodal alternating with 5% noise."""
bimodal = ComparisonResult(min, False, False, [BM_AL_NOISE[10]],
- BM_AL_NOISE[0:9])
+ BM_AL_NOISE[0:9], None, None)
self.assertEquals(bimodal.get_value_status(), UNCHANGED_PASS)
def test_bimodal_alternating_regression(self):
"""Bimodal alternating regression."""
bimodal = ComparisonResult(min, False, False, [BM_AL_REG[11]],
- BM_AL_REG[0:10])
+ BM_AL_REG[0:10], None, None)
self.assertEquals(bimodal.get_value_status(), REGRESSED)
def test_bimodal_regression(self):
"""A regression in a bimodal line."""
bimodal = ComparisonResult(min, False, False, [BM_REGRESSION[12]],
- BM_REGRESSION[0:11])
+ BM_REGRESSION[0:11], None, None)
self.assertEquals(bimodal.get_value_status(), REGRESSED)
def test_noise_bimodal_regression(self):
bimodal = ComparisonResult(
- min, False, False, [BM_REGS_NOISE[12]], BM_REGS_NOISE[0:11])
+ min, False, False, [BM_REGS_NOISE[12]], BM_REGS_NOISE[0:11], None,
+ None)
self.assertEquals(bimodal.get_value_status(), REGRESSED)
def test_bimodal_overlapping_regression(self):
bimodal = ComparisonResult(min, False, False, [BM_REG_OVERLAP[12]],
- BM_REG_OVERLAP[0:11])
+ BM_REG_OVERLAP[0:11], None, None)
self.assertEquals(bimodal.get_value_status(), REGRESSED)
def test_noise_bimodal_overlapping_regression(self):
bimodal = ComparisonResult(
min, False, False, [BM_REG_OVER_NOISE[12]],
- BM_REG_OVER_NOISE[0:11])
+ BM_REG_OVER_NOISE[0:11], None, None)
self.assertEquals(bimodal.get_value_status(), REGRESSED)
def test_single_spike(self):
- spike = ComparisonResult(min, False, False, [SPIKE[11]], SPIKE[0:10])
+ spike = ComparisonResult(
+ min, False, False, [SPIKE[11]], SPIKE[0:10], None, None)
# Fixme
# self.assertEquals(spike.get_value_status(), UNCHANGED_PASS)
def test_noise_single_spike(self):
- spike = ComparisonResult(min, False, False,
- [NOISE_SPIKE[8]], NOISE_SPIKE[0:7])
+ spike = ComparisonResult(
+ min, False, False, [NOISE_SPIKE[8]], NOISE_SPIKE[0:7], None, None)
# Fixme
# self.assertEquals(spike.get_value_status(), UNCHANGED_PASS)
def test_slow_regression(self):
- slow = ComparisonResult(min, False, False,
- [SLOW_REG[12]], SLOW_REG[0:11])
+ slow = ComparisonResult(
+ min, False, False, [SLOW_REG[12]], SLOW_REG[0:11], None, None)
self.assertEquals(slow.get_value_status(), REGRESSED)
def test_noise_slow_regression(self):
slow = ComparisonResult(
- min, False, False, [SLOW_REG_NOISE[12]], SLOW_REG_NOISE[0:11])
+ min, False, False, [SLOW_REG_NOISE[12]], SLOW_REG_NOISE[0:11],
+ None, None)
self.assertEquals(slow.get_value_status(), REGRESSED)
def test_slow_improvement(self):
slow = ComparisonResult(
- min, False, False, [SLOW_IMP[12]], SLOW_IMP[0:11])
+ min, False, False, [SLOW_IMP[12]], SLOW_IMP[0:11], None, None)
# Fixme
# self.assertEquals(slow.get_value_status(), IMPROVED)
def test_noise_slow_improvement(self):
slow = ComparisonResult(
- min, False, False, [SLOW_IMP_NOISE[12]], SLOW_IMP_NOISE[0:11])
+ min, False, False, [SLOW_IMP_NOISE[12]], SLOW_IMP_NOISE[0:11],
+ None, None)
# Fixme
# self.assertEquals(slow.get_value_status(), IMPROVED)
def test_handle_zero_sample(self):
for agfn in (min, median):
zeroSample = ComparisonResult(
- agfn, False, False, [0.005, 0.0047, 0.0048], [0.0, 0.01, 0.01])
+ agfn, False, False, [0.005, 0.0047, 0.0048], [0.0, 0.01, 0.01],
+ None, None)
self.assertEquals(zeroSample.get_value_status(), UNCHANGED_PASS)