Mbed-TLS · gilles-peskine-arm · Jul 3, 2020 · Jun 25, 2020 · Jun 25, 2020 · Jun 25, 2020
diff --git a/docs/architecture/testing/test-framework.md b/docs/architecture/testing/test-framework.md
@@ -22,7 +22,7 @@ Each test case has a description which succinctly describes for a human audience
 * Make the description descriptive. “foo: x=2, y=4” is more descriptive than “foo #2”. “foo: 0<x<y, both even” is even better if these inequalities and parities are why this particular test data was chosen.
 * Avoid changing the description of an existing test case without a good reason. This breaks the tracking of failures across CI runs, since this tracking is based on the descriptions.
 
-`tests/scripts/check-test-cases.py` enforces some rules and warns if some guidelines are violated.
+`tests/scripts/check_test_cases.py` enforces some rules and warns if some guidelines are violated.
 
 ## TLS tests
 
@@ -32,7 +32,7 @@ Each test case has a description which succinctly describes for a human audience
 
 Each test case in `ssl-opt.sh` has a description which succinctly describes for a human audience what the test does. The test description is the first parameter to `run_tests`.
 
-The same rules and guidelines apply as for [unit test descriptions](#unit-test-descriptions). In addition, the description must be written on the same line as `run_test`, in double quotes, for the sake of `check-test-cases.py`.
+The same rules and guidelines apply as for [unit test descriptions](#unit-test-descriptions). In addition, the description must be written on the same line as `run_test`, in double quotes, for the sake of `check_test_cases.py`.
 
 ## Running tests
 

diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh
@@ -680,7 +680,7 @@ component_check_doxy_blocks () {
 
 component_check_files () {
     msg "Check: file sanity checks (permissions, encodings)" # < 1s
-    record_status tests/scripts/check-files.py
+    record_status tests/scripts/check_files.py
 }
 
 component_check_changelog () {
@@ -707,7 +707,7 @@ component_check_test_cases () {
     else
         opt=''
     fi
-    record_status tests/scripts/check-test-cases.py $opt
+    record_status tests/scripts/check_test_cases.py $opt
     unset opt
 }
 

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+
+"""Analyze the test outcomes from a full CI run.
+
+This script can also run on outcomes from a partial run, but the results are
+less likely to be useful.
+"""
+
+import argparse
+import re
+import sys
+import traceback
+
+import check_test_cases
+
+class Results:
+    """Process analysis results."""
+
+    def __init__(self):
+        self.error_count = 0
+        self.warning_count = 0
+
+    @staticmethod
+    def log(fmt, *args, **kwargs):
+        sys.stderr.write((fmt + '\n').format(*args, **kwargs))
+
+    def error(self, fmt, *args, **kwargs):
+        self.log('Error: ' + fmt, *args, **kwargs)
+        self.error_count += 1
+
+    def warning(self, fmt, *args, **kwargs):
+        self.log('Warning: ' + fmt, *args, **kwargs)
+        self.warning_count += 1
+
+class TestCaseOutcomes:
+    """The outcomes of one test case across many configurations."""
+    # pylint: disable=too-few-public-methods
+
+    def __init__(self):
+        # Collect a list of witnesses of the test case succeeding or failing.
+        # Currently we don't do anything with witnesses except count them.
+        # The format of a witness is determined by the read_outcome_file
+        # function; it's the platform and configuration joined by ';'.
+        self.successes = []
+        self.failures = []
+
+    def hits(self):
+        """Return the number of times a test case has been run.
+
+        This includes passes and failures, but not skips.
+        """
+        return len(self.successes) + len(self.failures)
+
+class TestDescriptions(check_test_cases.TestDescriptionExplorer):
+    """Collect the available test cases."""
+
+    def __init__(self):
+        super().__init__()
+        self.descriptions = set()
+
+    def process_test_case(self, _per_file_state,
+                          file_name, _line_number, description):
+        """Record an available test case."""
+        base_name = re.sub(r'\.[^.]*$', '', re.sub(r'.*/', '', file_name))
+        key = ';'.join([base_name, description.decode('utf-8')])
+        self.descriptions.add(key)
+
+def collect_available_test_cases():
+    """Collect the available test cases."""
+    explorer = TestDescriptions()
+    explorer.walk_all()
+    return sorted(explorer.descriptions)
+
+def analyze_coverage(results, outcomes):
+    """Check that all available test cases are executed at least once."""
+    available = collect_available_test_cases()
+    for key in available:
+        hits = outcomes[key].hits() if key in outcomes else 0
+        if hits == 0:
+            # Make this a warning, not an error, as long as we haven't
+            # fixed this branch to have full coverage of test cases.
+            results.warning('Test case not executed: {}', key)
+
+def analyze_outcomes(outcomes):
+    """Run all analyses on the given outcome collection."""
+    results = Results()
+    analyze_coverage(results, outcomes)
+    return results
+
+def read_outcome_file(outcome_file):
+    """Parse an outcome file and return an outcome collection.
+
+An outcome collection is a dictionary mapping keys to TestCaseOutcomes objects.
+The keys are the test suite name and the test case description, separated
+by a semicolon.
+"""
+    outcomes = {}
+    with open(outcome_file, 'r', encoding='utf-8') as input_file:
+        for line in input_file:
+            (platform, config, suite, case, result, _cause) = line.split(';')
+            key = ';'.join([suite, case])
+            setup = ';'.join([platform, config])
+            if key not in outcomes:
+                outcomes[key] = TestCaseOutcomes()
+            if result == 'PASS':
+                outcomes[key].successes.append(setup)
+            elif result == 'FAIL':
+                outcomes[key].failures.append(setup)
+    return outcomes
+
+def analyze_outcome_file(outcome_file):
+    """Analyze the given outcome file."""
+    outcomes = read_outcome_file(outcome_file)
+    return analyze_outcomes(outcomes)
+
+def main():
+    try:
+        parser = argparse.ArgumentParser(description=__doc__)
+        parser.add_argument('outcomes', metavar='OUTCOMES.CSV',
+                            help='Outcome file to analyze')
+        options = parser.parse_args()
+        results = analyze_outcome_file(options.outcomes)
+        if results.error_count > 0:
+            sys.exit(1)
+    except Exception: # pylint: disable=broad-except
+        # Print the backtrace and exit explicitly with our chosen status.
+        traceback.print_exc()
+        sys.exit(120)
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/scripts/basic-in-docker.sh b/tests/scripts/basic-in-docker.sh
@@ -4,8 +4,10 @@
 #
 # Purpose
 # -------
-# This runs a rough equivalent of the travis.yml in a Docker container.
-# The tests are run for both clang and gcc.
+# This runs sanity checks and library tests in a Docker container. The tests
+# are run for both clang and gcc. The testing includes a full test run
+# in the default configuration, partial test runs in the reference
+# configurations, and some dependency tests.
 #
 # Notes for users
 # ---------------
@@ -30,12 +32,7 @@
 
 source tests/scripts/docker_env.sh
 
-run_in_docker tests/scripts/recursion.pl library/*.c
-run_in_docker tests/scripts/check-generated-files.sh
-run_in_docker tests/scripts/check-doxy-blocks.pl
-run_in_docker tests/scripts/check-names.sh
-run_in_docker tests/scripts/check-files.py
-run_in_docker tests/scripts/doxygen.sh
+run_in_docker tests/scripts/all.sh 'check_*'
 
 for compiler in clang gcc; do
     run_in_docker -e CC=${compiler} cmake -D CMAKE_BUILD_TYPE:String="Check" .

diff --git a/tests/scripts/check-test-cases.py b/tests/scripts/check-test-cases.py
diff --git a/tests/scripts/check-files.py → tests/scripts/check_files.py b/tests/scripts/check-files.py → tests/scripts/check_files.py