From ed4d623ad3894722f4e6a27455e8af3451efdde2 Mon Sep 17 00:00:00 2001
From: paulfouquet <86932794+paulfouquet@users.noreply.github.com>
Date: Thu, 28 Jul 2022 09:13:14 +1200
Subject: [PATCH] fix: log Non Visual QA error report in a JSON format (#55)

* fix: log Non Visual QA error report in a JSON format

* wip

* wip

* wip

* fix: log non-visual-qa as JSON

* fix: raise an exception when gdal output is an error
---
 scripts/gdal_helper.py              |  12 +-
 scripts/non_visual_qa.py            | 214 ++++++++++++++++------------
 scripts/tests/non_visual_qa_test.py |  67 +++++----
 3 files changed, 166 insertions(+), 127 deletions(-)

diff --git a/scripts/gdal_helper.py b/scripts/gdal_helper.py
index 97a4c85a0..0e205d81f 100644
--- a/scripts/gdal_helper.py
+++ b/scripts/gdal_helper.py
@@ -6,6 +6,10 @@
 from linz_logger import get_log
 
 
+class GDALExecutionException(Exception):
+    pass
+
+
 def get_vfs_path(path: str) -> str:
     """Make the path as a GDAL Virtual File Systems path.
 
@@ -68,10 +72,12 @@ def run_gdal(
         proc = subprocess.run(command, env=gdal_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
     except subprocess.CalledProcessError as cpe:
         get_log().error("run_gdal_failed", command=command_to_string(command), error=str(cpe.stderr, "utf-8"))
-        raise cpe
+        raise GDALExecutionException(f"GDAL {str(cpe.stderr, 'utf-8')}") from cpe
+
     if proc.stderr:
         get_log().error("run_gdal_error", command=command_to_string(command), error=proc.stderr.decode())
-        raise Exception(proc.stderr.decode())
-    get_log().debug("run_gdal_succeded", command=command_to_string(command))
+        raise GDALExecutionException(proc.stderr.decode())
+
+    get_log().debug("run_gdal_succeded", command=command_to_string(command), stdout=proc.stdout.decode())
 
     return proc
diff --git a/scripts/non_visual_qa.py b/scripts/non_visual_qa.py
index 572ae7c4f..03c41a4c2 100644
--- a/scripts/non_visual_qa.py
+++ b/scripts/non_visual_qa.py
@@ -1,73 +1,127 @@
 import argparse
 import json
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 
 from file_helper import is_tiff
 from format_source import format_source
-from gdal_helper import run_gdal
+from gdal_helper import GDALExecutionException, run_gdal
 from linz_logger import get_log
 
 
-def check_no_data(gdalinfo: Dict[str, Any], errors_list: List[str]) -> None:
-    """Add an error in errors_list if there is no "noDataValue" or the "noDataValue" is not equal to 255 in the "bands".
-
-    Args:
-        gdalinfo (Dict[str, Any]): JSON return of gdalinfo in a Python Dictionary.
-        errors_list (List[str]): List of errors as strings.
-    """
-    bands = gdalinfo["bands"]
-    if "noDataValue" in bands[0]:
-        current_nodata_val = bands[0]["noDataValue"]
-        if current_nodata_val != 255:
-            errors_list.append(f"noDataValue is {int(current_nodata_val)} not 255")
-    else:
-        errors_list.append("noDataValue not set")
-
-
-def check_band_count(gdalinfo: Dict[str, Any], errors_list: List[str]) -> None:
-    """Add an error in errors_list if there is no exactly 3 bands found.
-
-    Args:
-        gdalinfo (Dict[str, Any]): JSON returned by gdalinfo as a Python Dictionary.
-        errors_list (List[str]): List of errors as strings.
-    """
-    bands = gdalinfo["bands"]
-    bands_num = len(bands)
-    if bands_num != 3:
-        errors_list.append(f"not 3 bands, {bands_num} bands found")
-
-
-def check_srs(gdalsrsinfo: bytes, gdalsrsinfo_tif: bytes, errors_list: List[str]) -> None:
-    """Add an error in errors_list if gdalsrsinfo and gdalsrsinfo_tif values are different.
-
-    Args:
-        gdalsrsinfo (str): Value returned by gdalsrsinfo as a string.
-        gdalsrsinfo_tif (str): Value returned by gdalsrsinfo for the tif as a string.
-        errors_list (List[str]): List of errors as strings.
-    """
-    if gdalsrsinfo_tif != gdalsrsinfo:
-        errors_list.append("different srs")
-
-
-def check_color_interpretation(gdalinfo: Dict[str, Any], errors_list: List[str]) -> None:
-    bands = gdalinfo["bands"]
-    missing_bands = []
-    band_colour_ints = {1: "Red", 2: "Green", 3: "Blue"}
-    n = 1
-    for band in bands:
-        colour_int = band["colorInterpretation"]
-        if n in band_colour_ints:
-            if colour_int != band_colour_ints[n]:
-                missing_bands.append(f"band {n} {colour_int}")
+class FileCheck:
+    def __init__(self, path: str, srs: bytes) -> None:
+        self.path = path
+        self.global_srs = srs
+        self.errors: List[Dict[str, Any]] = []
+        self._valid = True
+
+    def add_error(self, error_type: str, error_message: str, custom_fields: Optional[Dict[str, str]] = None) -> None:
+        if not custom_fields:
+            custom_fields = {}
+        self.errors.append({"type": error_type, "message": error_message, **custom_fields})
+        self._valid = False
+
+    def is_valid(self) -> bool:
+        return self._valid
+
+    def check_no_data(self, gdalinfo: Dict[str, Any]) -> None:
+        """Add an error if there is no "noDataValue" or the "noDataValue" is not equal to 255 in the "bands".
+
+        Args:
+            gdalinfo (Dict[str, Any]): JSON return of gdalinfo in a Python Dictionary.
+        """
+        bands = gdalinfo["bands"]
+        if "noDataValue" in bands[0]:
+            current_nodata_val = bands[0]["noDataValue"]
+            if current_nodata_val != 255:
+                self.add_error(
+                    error_type="nodata",
+                    error_message="noDataValue is not 255",
+                    custom_fields={"current": f"{int(current_nodata_val)}"},
+                )
         else:
-            missing_bands.append(f"band {n} {colour_int}")
-        n += 1
-    if missing_bands:
-        missing_bands.sort()
-        errors_list.append(f"unexpected color interpretation bands; {', '.join(missing_bands)}")
-
-
-def main() -> None:
+            self.add_error(error_type="nodata", error_message="noDataValue not set")
+
+    def check_band_count(self, gdalinfo: Dict[str, Any]) -> None:
+        """Add an error if there is no exactly 3 bands found.
+
+        Args:
+            gdalinfo (Dict[str, Any]): JSON returned by gdalinfo as a Python Dictionary.
+        """
+        bands = gdalinfo["bands"]
+        bands_num = len(bands)
+        if bands_num != 3:
+            self.add_error(
+                error_type="bands", error_message="bands count is not 3", custom_fields={"count": f"{int(bands_num)}"}
+            )
+
+    def check_srs(self, gdalsrsinfo_tif: bytes) -> None:
+        """Add an error if gdalsrsinfo and gdalsrsinfo_tif values are different.
+
+        Args:
+            gdalsrsinfo (str): Value returned by gdalsrsinfo as a string.
+            gdalsrsinfo_tif (str): Value returned by gdalsrsinfo for the tif as a string.
+        """
+        if gdalsrsinfo_tif != self.global_srs:
+            self.add_error(error_type="srs", error_message="different srs")
+
+    def check_color_interpretation(self, gdalinfo: Dict[str, Any]) -> None:
+        """Add an error if the colors don't match RGB.
+
+        Args:
+            gdalinfo (Dict[str, Any]): JSON returned by gdalinfo as a Python Dictionary.
+        """
+        bands = gdalinfo["bands"]
+        missing_bands = []
+        band_colour_ints = {1: "Red", 2: "Green", 3: "Blue"}
+        n = 1
+        for band in bands:
+            colour_int = band["colorInterpretation"]
+            if n in band_colour_ints:
+                if colour_int != band_colour_ints[n]:
+                    missing_bands.append(f"band {n} {colour_int}")
+            else:
+                missing_bands.append(f"band {n} {colour_int}")
+            n += 1
+        if missing_bands:
+            missing_bands.sort()
+            self.add_error(
+                error_type="color",
+                error_message="unexpected color interpretation bands",
+                custom_fields={"missing": f"{', '.join(missing_bands)}"},
+            )
+
+    def run(self) -> None:
+        gdalinfo_success = True
+        gdalinfo_command = ["gdalinfo", "-stats", "-json"]
+        try:
+            gdalinfo_process = run_gdal(gdalinfo_command, self.path)
+            gdalinfo_result = {}
+            try:
+                gdalinfo_result = json.loads(gdalinfo_process.stdout)
+            except json.JSONDecodeError as e:
+                get_log().error("load_gdalinfo_result_error", file=self.path, error=e)
+                self.add_error(error_type="gdalinfo", error_message=f"parsing result issue: {str(e)}")
+                gdalinfo_success = False
+            if gdalinfo_process.stderr:
+                self.add_error(error_type="gdalinfo", error_message=f"error(s): {str(gdalinfo_process.stderr)}")
+        except GDALExecutionException as gee:
+            self.add_error(error_type="gdalinfo", error_message=f"failed: {str(gee)}")
+            gdalinfo_success = False
+
+        if gdalinfo_success:
+            self.check_no_data(gdalinfo_result)
+            self.check_band_count(gdalinfo_result)
+            self.check_color_interpretation(gdalinfo_result)
+            gdalsrsinfo_tif_command = ["gdalsrsinfo", "-o", "wkt"]
+            try:
+                gdalsrsinfo_tif_result = run_gdal(gdalsrsinfo_tif_command, self.path)
+                self.check_srs(gdalsrsinfo_tif_result.stdout)
+            except GDALExecutionException as gee:
+                self.add_error(error_type="srs", error_message=f"not checked: {str(gee)}")
+
+
+def main() -> None:  # pylint: disable=too-many-locals
     parser = argparse.ArgumentParser()
     parser.add_argument("--source", dest="source", nargs="+", required=True)
     arguments = parser.parse_args()
@@ -88,41 +142,13 @@ def main() -> None:
         if not is_tiff(file):
             get_log().trace("non_visual_qa_file_not_tiff_skipped", file=file)
             continue
+        file_check = FileCheck(file, srs)
+        file_check.run()
 
-        gdalinfo_command = ["gdalinfo", "-stats", "-json"]
-        gdalinfo_process = run_gdal(gdalinfo_command, file)
-        gdalinfo_result = {}
-        try:
-            gdalinfo_result = json.loads(gdalinfo_process.stdout)
-        except json.JSONDecodeError as e:
-            get_log().error("load_gdalinfo_result_error", file=file, error=e)
-            continue
-
-        gdalinfo_errors = gdalinfo_process.stderr
-
-        # Check result
-        errors: List[str] = []
-        # No data
-        check_no_data(gdalinfo_result, errors)
-
-        # Band count
-        check_band_count(gdalinfo_result, errors)
-
-        # srs
-        gdalsrsinfo_tif_command = ["gdalsrsinfo", "-o", "wkt"]
-        gdalsrsinfo_tif_result = run_gdal(gdalsrsinfo_tif_command, file)
-        check_srs(srs, gdalsrsinfo_tif_result.stdout, errors)
-
-        # Color interpretation
-        check_color_interpretation(gdalinfo_result, errors)
-
-        # gdal errors
-        errors.append(f"{gdalinfo_errors!r}")
-
-        if len(errors) > 0:
-            get_log().info("non_visual_qa_errors_found", file=file, result=errors)
+        if not file_check.is_valid():
+            get_log().info("non_visual_qa_errors", file=file_check.path, errors=file_check.errors)
         else:
-            get_log().info("non_visual_qa_no_error", file=file)
+            get_log().info("non_visual_qa_passed", file=file_check.path)
 
 
 if __name__ == "__main__":
diff --git a/scripts/tests/non_visual_qa_test.py b/scripts/tests/non_visual_qa_test.py
index 2711155c1..c682fbb0c 100644
--- a/scripts/tests/non_visual_qa_test.py
+++ b/scripts/tests/non_visual_qa_test.py
@@ -1,6 +1,4 @@
-from typing import List
-
-from non_visual_qa import check_band_count, check_color_interpretation, check_no_data, check_srs
+from non_visual_qa import FileCheck
 
 
 def test_check_band_count_valid() -> None:
@@ -10,10 +8,11 @@ def test_check_band_count_valid() -> None:
     """
     gdalinfo = {}
     gdalinfo["bands"] = [{"band": 1}, {"band": 2}, {"band": 3}]
-    errors: List[str] = []
 
-    check_band_count(gdalinfo, errors)
-    assert len(errors) == 0
+    file_check = FileCheck("test", b"srs")
+    file_check.check_band_count(gdalinfo)
+
+    assert not file_check.errors
 
 
 def test_check_band_count_invalid() -> None:
@@ -23,10 +22,11 @@ def test_check_band_count_invalid() -> None:
     """
     gdalinfo = {}
     gdalinfo["bands"] = [{"band": 1}, {"band": 2}]
-    errors: List[str] = []
 
-    check_band_count(gdalinfo, errors)
-    assert len(errors) == 1
+    file_check = FileCheck("test", b"srs")
+    file_check.check_band_count(gdalinfo)
+
+    assert file_check.errors
 
 
 def test_check_color_interpretation_valid() -> None:
@@ -45,10 +45,11 @@ def test_check_color_interpretation_valid() -> None:
             "colorInterpretation": "Blue",
         },
     ]
-    errors: List[str] = []
 
-    check_color_interpretation(gdalinfo, errors)
-    assert len(errors) == 0
+    file_check = FileCheck("test", b"srs")
+    file_check.check_color_interpretation(gdalinfo)
+
+    assert not file_check.errors
 
 
 def test_check_color_interpretation_invalid() -> None:
@@ -70,10 +71,11 @@ def test_check_color_interpretation_invalid() -> None:
             "colorInterpretation": "Undefined",
         },
     ]
-    errors: List[str] = []
 
-    check_color_interpretation(gdalinfo, errors)
-    assert len(errors) == 1
+    file_check = FileCheck("test", b"srs")
+    file_check.check_color_interpretation(gdalinfo)
+
+    assert file_check.errors
 
 
 def test_check_no_data_valid() -> None:
@@ -86,10 +88,11 @@ def test_check_no_data_valid() -> None:
             "noDataValue": 255,
         }
     ]
-    errors: List[str] = []
 
-    check_no_data(gdalinfo, errors)
-    assert len(errors) == 0
+    file_check = FileCheck("test", b"srs")
+    file_check.check_no_data(gdalinfo)
+
+    assert not file_check.errors
 
 
 def test_check_no_data_no_value() -> None:
@@ -98,10 +101,11 @@ def test_check_no_data_no_value() -> None:
     """
     gdalinfo = {}
     gdalinfo["bands"] = [{"test": 1}]
-    errors: List[str] = []
 
-    check_no_data(gdalinfo, errors)
-    assert len(errors) == 1
+    file_check = FileCheck("test", b"srs")
+    file_check.check_no_data(gdalinfo)
+
+    assert file_check.errors
 
 
 def test_check_no_data_invalid_value() -> None:
@@ -114,10 +118,11 @@ def test_check_no_data_invalid_value() -> None:
             "noDataValue": 0,
         }
     ]
-    errors: List[str] = []
 
-    check_no_data(gdalinfo, errors)
-    assert len(errors) == 1
+    file_check = FileCheck("test", b"srs")
+    file_check.check_no_data(gdalinfo)
+
+    assert file_check.errors
 
 
 def test_check_srs_valid() -> None:
@@ -126,10 +131,11 @@ def test_check_srs_valid() -> None:
     """
     srs_to_test_against = b"SRS Test"
     srs_tif = b"SRS Test"
-    errors: List[str] = []
 
-    check_srs(srs_to_test_against, srs_tif, errors)
-    assert len(errors) == 0
+    file_check = FileCheck("test", srs_to_test_against)
+    file_check.check_srs(srs_tif)
+
+    assert not file_check.errors
 
 
 def test_check_srs_invalid() -> None:
@@ -138,7 +144,8 @@ def test_check_srs_invalid() -> None:
     """
     srs_to_test_against = b"SRS Test"
     srs_tif = b"SRS Different"
-    errors: List[str] = []
 
-    check_srs(srs_to_test_against, srs_tif, errors)
-    assert len(errors) == 1
+    file_check = FileCheck("test", srs_to_test_against)
+    file_check.check_srs(srs_tif)
+
+    assert file_check.errors