Add markdown_results_table benchcomp visualization (#2413)

This commit adds a new visualization that writes all results out to a file as a series of tables, one for each metric. For each metric, each row comprises the benchmark name, followed by the values of the metric for each of the variants. This is an example of the output: ``` ## runtime | Benchmark | variant_1 | variant_2 | | --- | --- |--- | | bench_1 | 5 | 10 | | bench_2 | 10 | 5 | ## success | Benchmark | variant_1 | variant_2 | | --- | --- |--- | | bench_1 | True | True | | bench_2 | True | False | ```
model-checking · Apr 27, 2023 · d045764 · d045764
1 parent 6f57d12
commit d045764
Show file tree

Hide file tree

Showing 4 changed files with 157 additions and 2 deletions.
diff --git a/.github/workflows/kani.yml b/.github/workflows/kani.yml
@@ -306,4 +306,20 @@ jobs:
       - name: Run benchcomp
         run: |
           new/tools/benchcomp/bin/benchcomp \
-            --config new/tools/benchcomp/configs/perf-regression.yaml
+            --config new/tools/benchcomp/configs/perf-regression.yaml \
+            run
+          new/tools/benchcomp/bin/benchcomp \
+            --config new/tools/benchcomp/configs/perf-regression.yaml \
+            collate
+
+      - name: Perf Regression Results Table
+        run: |
+          new/tools/benchcomp/bin/benchcomp \
+            --config new/tools/benchcomp/configs/perf-regression.yaml \
+            visualize --only dump_markdown_results_table >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Run other visualizations
+        run: |
+          new/tools/benchcomp/bin/benchcomp \
+            --config new/tools/benchcomp/configs/perf-regression.yaml \
+            visualize --except dump_markdown_results_table
diff --git a/tools/benchcomp/benchcomp/visualizers/__init__.py b/tools/benchcomp/benchcomp/visualizers/__init__.py
@@ -3,7 +3,9 @@
 
 
 import dataclasses
+import textwrap
 
+import jinja2
 import yaml
 
 import benchcomp
@@ -76,3 +78,73 @@ def __call__(self, results):
         with self.get_out_file() as handle:
             print(
                 yaml.dump(results, default_flow_style=False), file=handle)
+
+
+
+class dump_markdown_results_table:
+    """Print a Markdown-formatted table displaying benchmark results
+
+    The 'out_file' key is mandatory; specify '-' to print to stdout.
+
+    Sample configuration:
+
+    visualize:
+    - type: dump_markdown_results_table
+      out_file: '-'
+    """
+
+
+    def __init__(self, out_file):
+        self.get_out_file = benchcomp.Outfile(out_file)
+
+
+    @staticmethod
+    def _get_template():
+        return textwrap.dedent("""\
+            {% for metric, benchmarks in d["metrics"].items() %}
+            ## {{ metric }}
+
+            | Benchmark | {% for variant in d["variants"] %} {{ variant }} |{% endfor %}
+            | --- | {% for variant in d["variants"] %}--- |{% endfor -%}
+            {% for bench_name, bench_variants in benchmarks.items () %}
+            | {{ bench_name }} {% for variant in d["variants"] -%}
+             | {{ bench_variants[variant] }} {% endfor %}|
+            {%- endfor %}
+            {% endfor -%}
+            """)
+
+
+    @staticmethod
+    def _get_variant_names(results):
+        return results.values()[0]["variants"]
+
+
+    @staticmethod
+    def _organize_results_into_metrics(results):
+        ret = {metric: {} for metric in results["metrics"]}
+        for bench, bench_result in results["benchmarks"].items():
+            for variant, variant_result in bench_result["variants"].items():
+                for metric, value in variant_result["metrics"].items():
+                    try:
+                        ret[metric][bench][variant] = variant_result["metrics"][metric]
+                    except KeyError:
+                        ret[metric][bench] = {
+                            variant: variant_result["metrics"][metric]
+                    }
+        return ret
+
+
+    def __call__(self, results):
+        data = {
+            "metrics": self._organize_results_into_metrics(results),
+            "variants": list(results["benchmarks"].values())[0]["variants"],
+        }
+
+        env = jinja2.Environment(
+            loader=jinja2.BaseLoader, autoescape=jinja2.select_autoescape(
+                enabled_extensions=("html"),
+                default_for_string=True))
+        template = env.from_string(self._get_template())
+        output = template.render(d=data)[:-1]
+        with self.get_out_file() as handle:
+            print(output, file=handle)
diff --git a/tools/benchcomp/configs/perf-regression.yaml b/tools/benchcomp/configs/perf-regression.yaml
@@ -29,7 +29,10 @@ run:
 
 visualize:
   - type: dump_yaml
-    out_file: '/tmp/result.yaml'
+    out_file: '-'
+
+  - type: dump_markdown_results_table
+    out_file: '-'
 
   - type: error_on_regression
     variant_pairs: [[kani_old, kani_new]]

diff --git a/tools/benchcomp/test/test_regression.py b/tools/benchcomp/test/test_regression.py
@@ -8,6 +8,7 @@
 import pathlib
 import subprocess
 import tempfile
+import textwrap
 import unittest
 
 import yaml
@@ -391,6 +392,69 @@ def test_error_on_regression_visualization_ratio_regressed(self):
                 run_bc.proc.returncode, 1, msg=run_bc.stderr)
 
 
+    def test_markdown_results_table(self):
+        """Run the markdown results table visualization"""
+
+        with tempfile.TemporaryDirectory() as tmp:
+            run_bc = Benchcomp({
+                "variants": {
+                    "variant_1": {
+                        "config": {
+                            "directory": str(tmp),
+                            "command_line":
+                                "mkdir bench_1 bench_2"
+                                "&& echo true > bench_1/success"
+                                "&& echo true > bench_2/success"
+                                "&& echo 5 > bench_1/runtime"
+                                "&& echo 10 > bench_2/runtime"
+                        },
+                    },
+                    "variant_2": {
+                        "config": {
+                            "directory": str(tmp),
+                            "command_line":
+                                "mkdir bench_1 bench_2"
+                                "&& echo true > bench_1/success"
+                                "&& echo false > bench_2/success"
+                                "&& echo 10 > bench_1/runtime"
+                                "&& echo 5 > bench_2/runtime"
+                        }
+                    }
+                },
+                "run": {
+                    "suites": {
+                        "suite_1": {
+                            "parser": { "module": "test_file_to_metric" },
+                            "variants": ["variant_1", "variant_2"]
+                        }
+                    }
+                },
+                "visualize": [{
+                    "type": "dump_markdown_results_table",
+                    "out_file": "-",
+                }]
+            })
+            run_bc()
+
+            self.assertEqual(run_bc.proc.returncode, 0, msg=run_bc.stderr)
+            self.assertEqual(
+                run_bc.stdout, textwrap.dedent("""
+                    ## runtime
+
+                    | Benchmark |  variant_1 | variant_2 |
+                    | --- | --- |--- |
+                    | bench_1 | 5 | 10 |
+                    | bench_2 | 10 | 5 |
+
+                    ## success
+
+                    | Benchmark |  variant_1 | variant_2 |
+                    | --- | --- |--- |
+                    | bench_1 | True | True |
+                    | bench_2 | True | False |
+                    """))
+
+
     def test_only_dump_yaml(self):
         """Ensure that benchcomp terminates with return code 0 when `--only dump_yaml` is passed, even if the error_on_regression visualization would have resulted in a return code of 1"""