Skip to content

Commit

Permalink
Add extra_column to benchcomp markdown visualizer (#2415)
Browse files Browse the repository at this point in the history
  • Loading branch information
karkhaz authored Apr 28, 2023
1 parent d045764 commit 47afab5
Show file tree
Hide file tree
Showing 3 changed files with 181 additions and 18 deletions.
105 changes: 97 additions & 8 deletions tools/benchcomp/benchcomp/visualizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,84 @@ def __call__(self, results):


class dump_markdown_results_table:
"""Print a Markdown-formatted table displaying benchmark results
"""Print Markdown-formatted tables displaying benchmark results
For each metric, this visualization prints out a table of benchmarks,
showing the value of the metric for each variant.
The 'out_file' key is mandatory; specify '-' to print to stdout.
'extra_colums' can be an empty dict. The sample configuration below assumes
that each benchmark result has a 'success' and 'runtime' metric for both
variants, 'variant_1' and 'variant_2'. It adds a 'ratio' column to the table
for the 'runtime' metric, and a 'change' column to the table for the
'success' metric. The 'text' lambda is called once for each benchmark. The
'text' lambda accepts a single argument---a dict---that maps variant
names to the value of that variant for a particular metric. The lambda
returns a string that is rendered in the benchmark's row in the new column.
This allows you to emit arbitrary text or markdown formatting in response to
particular combinations of values for different variants, such as
regressions or performance improvements.
Sample configuration:
```
visualize:
- type: dump_markdown_results_table
out_file: '-'
out_file: "-"
extra_columns:
runtime:
- column_name: ratio
text: >
lambda b: str(b["variant_2"]/b["variant_1"])
if b["variant_2"] < (1.5 * b["variant_1"])
else "**" + str(b["variant_2"]/b["variant_1"])
success:
- column_name: change
text: >
lambda b: "" if b["variant_2"] == b["variant_1"]
else "newly passing" if b["variant_2"]
else "regressed"
```
Example output:
```
## runtime
| Benchmark | variant_1 | variant_2 | ratio |
| --- | --- | --- | --- |
| bench_1 | 5 | 10 | **2.0** |
| bench_2 | 10 | 5 | 0.5 |
## success
| Benchmark | variant_1 | variant_2 | notes |
| --- | --- | --- | --- |
| bench_1 | True | True | |
| bench_2 | True | False | regressed |
| bench_3 | False | True | newly passing |
```
"""


def __init__(self, out_file):
def __init__(self, out_file, extra_columns=None):
self.get_out_file = benchcomp.Outfile(out_file)
self.extra_columns = self._eval_column_text(extra_columns or {})


@staticmethod
def _eval_column_text(column_spec):
for columns in column_spec.values():
for column in columns:
try:
column["text"] = eval(column["text"])
except SyntaxError:
logging.error(
"This column text is not a valid python program: '%s'",
column["text"])
sys.exit(1)
return column_spec


@staticmethod
Expand All @@ -104,10 +168,10 @@ def _get_template():
{% for metric, benchmarks in d["metrics"].items() %}
## {{ metric }}
| Benchmark | {% for variant in d["variants"] %} {{ variant }} |{% endfor %}
| --- | {% for variant in d["variants"] %}--- |{% endfor -%}
| Benchmark | {% for variant in d["variants"][metric] %} {{ variant }} |{% endfor %}
| --- |{% for variant in d["variants"][metric] %} --- |{% endfor -%}
{% for bench_name, bench_variants in benchmarks.items () %}
| {{ bench_name }} {% for variant in d["variants"] -%}
| {{ bench_name }} {% for variant in d["variants"][metric] -%}
| {{ bench_variants[variant] }} {% endfor %}|
{%- endfor %}
{% endfor -%}
Expand All @@ -134,10 +198,35 @@ def _organize_results_into_metrics(results):
return ret


def _add_extra_columns(self, metrics):
for metric, benches in metrics.items():
try:
columns = self.extra_columns[metric]
except KeyError:
continue
for bench, variants in benches.items():
tmp_variants = dict(variants)
for column in columns:
variants[column["column_name"]] = column["text"](tmp_variants)


@staticmethod
def _get_variants(metrics):
ret = {}
for metric, benches in metrics.items():
for bench, variants in benches.items():
ret[metric] = list(variants.keys())
break
return ret


def __call__(self, results):
metrics = self._organize_results_into_metrics(results)
self._add_extra_columns(metrics)

data = {
"metrics": self._organize_results_into_metrics(results),
"variants": list(results["benchmarks"].values())[0]["variants"],
"metrics": metrics,
"variants": self._get_variants(metrics),
}

env = jinja2.Environment(
Expand Down
55 changes: 55 additions & 0 deletions tools/benchcomp/configs/perf-regression.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,61 @@ visualize:

- type: dump_markdown_results_table
out_file: '-'
extra_columns:

# For these two metrics, display the difference between old and new and
# embolden if the absolute difference is more than 10% of the old value
number_vccs:
- column_name: diff old → new
text: >
lambda b: "" if b["kani_new"] == b["kani_old"]
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.1 else "")
+ ("+" if b["kani_new"] > b["kani_old"] else "")
+ str(b["kani_new"] - b["kani_old"])
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.1 else "")
number_program_steps:
- column_name: diff old → new
text: >
lambda b: "" if b["kani_new"] == b["kani_old"]
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.1 else "")
+ ("+" if b["kani_new"] > b["kani_old"] else "")
+ str(b["kani_new"] - b["kani_old"])
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.1 else "")
# For 'runtime' metrics, display the % change from old to new, emboldening
# cells whose absolute change is >50%
solver_runtime:
- column_name: "% change old → new"
text: >
lambda b: "" if b["kani_new"] == b["kani_old"]
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
+ ("+" if b["kani_new"] > b["kani_old"] else "")
+ "%.3f%%" % ((b["kani_new"] - b["kani_old"]) * 100 / b["kani_old"])
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
verification_time:
- column_name: "% change old → new"
text: >
lambda b: "" if b["kani_new"] == b["kani_old"]
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
+ ("+" if b["kani_new"] > b["kani_old"] else "")
+ "%.3f%%" % ((b["kani_new"] - b["kani_old"]) * 100 / b["kani_old"])
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
symex_runtime:
- column_name: "% change old → new"
text: >
lambda b: "" if b["kani_new"] == b["kani_old"]
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
+ ("+" if b["kani_new"] > b["kani_old"] else "")
+ "%.3f%%" % ((b["kani_new"] - b["kani_old"]) * 100 / b["kani_old"])
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
# For success metric, display some text if success has changed
success:
- column_name: change
text: >
lambda b: "" if b["kani_new"] == b["kani_old"]
else "❌ newly failing" if b["kani_old"]
else "✅ newly passing"
- type: error_on_regression
variant_pairs: [[kani_old, kani_new]]
Expand Down
39 changes: 29 additions & 10 deletions tools/benchcomp/test/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,9 +402,10 @@ def test_markdown_results_table(self):
"config": {
"directory": str(tmp),
"command_line":
"mkdir bench_1 bench_2"
"mkdir bench_1 bench_2 bench_3"
"&& echo true > bench_1/success"
"&& echo true > bench_2/success"
"&& echo false > bench_3/success"
"&& echo 5 > bench_1/runtime"
"&& echo 10 > bench_2/runtime"
},
Expand All @@ -413,9 +414,10 @@ def test_markdown_results_table(self):
"config": {
"directory": str(tmp),
"command_line":
"mkdir bench_1 bench_2"
"mkdir bench_1 bench_2 bench_3"
"&& echo true > bench_1/success"
"&& echo false > bench_2/success"
"&& echo true > bench_3/success"
"&& echo 10 > bench_1/runtime"
"&& echo 5 > bench_2/runtime"
}
Expand All @@ -432,6 +434,22 @@ def test_markdown_results_table(self):
"visualize": [{
"type": "dump_markdown_results_table",
"out_file": "-",
"extra_columns": {
"runtime": [{
"column_name": "ratio",
"text":
"lambda b: str(b['variant_2']/b['variant_1'])"
"if b['variant_2'] < 1.5 * b['variant_1'] "
"else '**' + str(b['variant_2']/b['variant_1']) + '**'"
}],
"success": [{
"column_name": "notes",
"text":
"lambda b: '' if b['variant_2'] == b['variant_1']"
"else 'newly passing' if b['variant_2'] "
"else 'regressed'"
}]
}
}]
})
run_bc()
Expand All @@ -441,17 +459,18 @@ def test_markdown_results_table(self):
run_bc.stdout, textwrap.dedent("""
## runtime
| Benchmark | variant_1 | variant_2 |
| --- | --- |--- |
| bench_1 | 5 | 10 |
| bench_2 | 10 | 5 |
| Benchmark | variant_1 | variant_2 | ratio |
| --- | --- | --- | --- |
| bench_1 | 5 | 10 | **2.0** |
| bench_2 | 10 | 5 | 0.5 |
## success
| Benchmark | variant_1 | variant_2 |
| --- | --- |--- |
| bench_1 | True | True |
| bench_2 | True | False |
| Benchmark | variant_1 | variant_2 | notes |
| --- | --- | --- | --- |
| bench_1 | True | True | |
| bench_2 | True | False | regressed |
| bench_3 | False | True | newly passing |
"""))


Expand Down

0 comments on commit 47afab5

Please sign in to comment.