From ed4d623ad3894722f4e6a27455e8af3451efdde2 Mon Sep 17 00:00:00 2001 From: paulfouquet <86932794+paulfouquet@users.noreply.github.com> Date: Thu, 28 Jul 2022 09:13:14 +1200 Subject: [PATCH] fix: log Non Visual QA error report in a JSON format (#55) * fix: log Non Visual QA error report in a JSON format * wip * wip * wip * fix: log non-visual-qa as JSON * fix: raise an exception when gdal output is an error --- scripts/gdal_helper.py | 12 +- scripts/non_visual_qa.py | 214 ++++++++++++++++------------ scripts/tests/non_visual_qa_test.py | 67 +++++---- 3 files changed, 166 insertions(+), 127 deletions(-) diff --git a/scripts/gdal_helper.py b/scripts/gdal_helper.py index 97a4c85a0..0e205d81f 100644 --- a/scripts/gdal_helper.py +++ b/scripts/gdal_helper.py @@ -6,6 +6,10 @@ from linz_logger import get_log +class GDALExecutionException(Exception): + pass + + def get_vfs_path(path: str) -> str: """Make the path as a GDAL Virtual File Systems path. @@ -68,10 +72,12 @@ def run_gdal( proc = subprocess.run(command, env=gdal_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) except subprocess.CalledProcessError as cpe: get_log().error("run_gdal_failed", command=command_to_string(command), error=str(cpe.stderr, "utf-8")) - raise cpe + raise GDALExecutionException(f"GDAL {str(cpe.stderr, 'utf-8')}") from cpe + if proc.stderr: get_log().error("run_gdal_error", command=command_to_string(command), error=proc.stderr.decode()) - raise Exception(proc.stderr.decode()) - get_log().debug("run_gdal_succeded", command=command_to_string(command)) + raise GDALExecutionException(proc.stderr.decode()) + + get_log().debug("run_gdal_succeded", command=command_to_string(command), stdout=proc.stdout.decode()) return proc diff --git a/scripts/non_visual_qa.py b/scripts/non_visual_qa.py index 572ae7c4f..03c41a4c2 100644 --- a/scripts/non_visual_qa.py +++ b/scripts/non_visual_qa.py @@ -1,73 +1,127 @@ import argparse import json -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from file_helper import is_tiff from format_source import format_source -from gdal_helper import run_gdal +from gdal_helper import GDALExecutionException, run_gdal from linz_logger import get_log -def check_no_data(gdalinfo: Dict[str, Any], errors_list: List[str]) -> None: - """Add an error in errors_list if there is no "noDataValue" or the "noDataValue" is not equal to 255 in the "bands". - - Args: - gdalinfo (Dict[str, Any]): JSON return of gdalinfo in a Python Dictionary. - errors_list (List[str]): List of errors as strings. - """ - bands = gdalinfo["bands"] - if "noDataValue" in bands[0]: - current_nodata_val = bands[0]["noDataValue"] - if current_nodata_val != 255: - errors_list.append(f"noDataValue is {int(current_nodata_val)} not 255") - else: - errors_list.append("noDataValue not set") - - -def check_band_count(gdalinfo: Dict[str, Any], errors_list: List[str]) -> None: - """Add an error in errors_list if there is no exactly 3 bands found. - - Args: - gdalinfo (Dict[str, Any]): JSON returned by gdalinfo as a Python Dictionary. - errors_list (List[str]): List of errors as strings. - """ - bands = gdalinfo["bands"] - bands_num = len(bands) - if bands_num != 3: - errors_list.append(f"not 3 bands, {bands_num} bands found") - - -def check_srs(gdalsrsinfo: bytes, gdalsrsinfo_tif: bytes, errors_list: List[str]) -> None: - """Add an error in errors_list if gdalsrsinfo and gdalsrsinfo_tif values are different. - - Args: - gdalsrsinfo (str): Value returned by gdalsrsinfo as a string. - gdalsrsinfo_tif (str): Value returned by gdalsrsinfo for the tif as a string. - errors_list (List[str]): List of errors as strings. - """ - if gdalsrsinfo_tif != gdalsrsinfo: - errors_list.append("different srs") - - -def check_color_interpretation(gdalinfo: Dict[str, Any], errors_list: List[str]) -> None: - bands = gdalinfo["bands"] - missing_bands = [] - band_colour_ints = {1: "Red", 2: "Green", 3: "Blue"} - n = 1 - for band in bands: - colour_int = band["colorInterpretation"] - if n in band_colour_ints: - if colour_int != band_colour_ints[n]: - missing_bands.append(f"band {n} {colour_int}") +class FileCheck: + def __init__(self, path: str, srs: bytes) -> None: + self.path = path + self.global_srs = srs + self.errors: List[Dict[str, Any]] = [] + self._valid = True + + def add_error(self, error_type: str, error_message: str, custom_fields: Optional[Dict[str, str]] = None) -> None: + if not custom_fields: + custom_fields = {} + self.errors.append({"type": error_type, "message": error_message, **custom_fields}) + self._valid = False + + def is_valid(self) -> bool: + return self._valid + + def check_no_data(self, gdalinfo: Dict[str, Any]) -> None: + """Add an error if there is no "noDataValue" or the "noDataValue" is not equal to 255 in the "bands". + + Args: + gdalinfo (Dict[str, Any]): JSON return of gdalinfo in a Python Dictionary. + """ + bands = gdalinfo["bands"] + if "noDataValue" in bands[0]: + current_nodata_val = bands[0]["noDataValue"] + if current_nodata_val != 255: + self.add_error( + error_type="nodata", + error_message="noDataValue is not 255", + custom_fields={"current": f"{int(current_nodata_val)}"}, + ) else: - missing_bands.append(f"band {n} {colour_int}") - n += 1 - if missing_bands: - missing_bands.sort() - errors_list.append(f"unexpected color interpretation bands; {', '.join(missing_bands)}") - - -def main() -> None: + self.add_error(error_type="nodata", error_message="noDataValue not set") + + def check_band_count(self, gdalinfo: Dict[str, Any]) -> None: + """Add an error if there is no exactly 3 bands found. + + Args: + gdalinfo (Dict[str, Any]): JSON returned by gdalinfo as a Python Dictionary. + """ + bands = gdalinfo["bands"] + bands_num = len(bands) + if bands_num != 3: + self.add_error( + error_type="bands", error_message="bands count is not 3", custom_fields={"count": f"{int(bands_num)}"} + ) + + def check_srs(self, gdalsrsinfo_tif: bytes) -> None: + """Add an error if gdalsrsinfo and gdalsrsinfo_tif values are different. + + Args: + gdalsrsinfo (str): Value returned by gdalsrsinfo as a string. + gdalsrsinfo_tif (str): Value returned by gdalsrsinfo for the tif as a string. + """ + if gdalsrsinfo_tif != self.global_srs: + self.add_error(error_type="srs", error_message="different srs") + + def check_color_interpretation(self, gdalinfo: Dict[str, Any]) -> None: + """Add an error if the colors don't match RGB. + + Args: + gdalinfo (Dict[str, Any]): JSON returned by gdalinfo as a Python Dictionary. + """ + bands = gdalinfo["bands"] + missing_bands = [] + band_colour_ints = {1: "Red", 2: "Green", 3: "Blue"} + n = 1 + for band in bands: + colour_int = band["colorInterpretation"] + if n in band_colour_ints: + if colour_int != band_colour_ints[n]: + missing_bands.append(f"band {n} {colour_int}") + else: + missing_bands.append(f"band {n} {colour_int}") + n += 1 + if missing_bands: + missing_bands.sort() + self.add_error( + error_type="color", + error_message="unexpected color interpretation bands", + custom_fields={"missing": f"{', '.join(missing_bands)}"}, + ) + + def run(self) -> None: + gdalinfo_success = True + gdalinfo_command = ["gdalinfo", "-stats", "-json"] + try: + gdalinfo_process = run_gdal(gdalinfo_command, self.path) + gdalinfo_result = {} + try: + gdalinfo_result = json.loads(gdalinfo_process.stdout) + except json.JSONDecodeError as e: + get_log().error("load_gdalinfo_result_error", file=self.path, error=e) + self.add_error(error_type="gdalinfo", error_message=f"parsing result issue: {str(e)}") + gdalinfo_success = False + if gdalinfo_process.stderr: + self.add_error(error_type="gdalinfo", error_message=f"error(s): {str(gdalinfo_process.stderr)}") + except GDALExecutionException as gee: + self.add_error(error_type="gdalinfo", error_message=f"failed: {str(gee)}") + gdalinfo_success = False + + if gdalinfo_success: + self.check_no_data(gdalinfo_result) + self.check_band_count(gdalinfo_result) + self.check_color_interpretation(gdalinfo_result) + gdalsrsinfo_tif_command = ["gdalsrsinfo", "-o", "wkt"] + try: + gdalsrsinfo_tif_result = run_gdal(gdalsrsinfo_tif_command, self.path) + self.check_srs(gdalsrsinfo_tif_result.stdout) + except GDALExecutionException as gee: + self.add_error(error_type="srs", error_message=f"not checked: {str(gee)}") + + +def main() -> None: # pylint: disable=too-many-locals parser = argparse.ArgumentParser() parser.add_argument("--source", dest="source", nargs="+", required=True) arguments = parser.parse_args() @@ -88,41 +142,13 @@ def main() -> None: if not is_tiff(file): get_log().trace("non_visual_qa_file_not_tiff_skipped", file=file) continue + file_check = FileCheck(file, srs) + file_check.run() - gdalinfo_command = ["gdalinfo", "-stats", "-json"] - gdalinfo_process = run_gdal(gdalinfo_command, file) - gdalinfo_result = {} - try: - gdalinfo_result = json.loads(gdalinfo_process.stdout) - except json.JSONDecodeError as e: - get_log().error("load_gdalinfo_result_error", file=file, error=e) - continue - - gdalinfo_errors = gdalinfo_process.stderr - - # Check result - errors: List[str] = [] - # No data - check_no_data(gdalinfo_result, errors) - - # Band count - check_band_count(gdalinfo_result, errors) - - # srs - gdalsrsinfo_tif_command = ["gdalsrsinfo", "-o", "wkt"] - gdalsrsinfo_tif_result = run_gdal(gdalsrsinfo_tif_command, file) - check_srs(srs, gdalsrsinfo_tif_result.stdout, errors) - - # Color interpretation - check_color_interpretation(gdalinfo_result, errors) - - # gdal errors - errors.append(f"{gdalinfo_errors!r}") - - if len(errors) > 0: - get_log().info("non_visual_qa_errors_found", file=file, result=errors) + if not file_check.is_valid(): + get_log().info("non_visual_qa_errors", file=file_check.path, errors=file_check.errors) else: - get_log().info("non_visual_qa_no_error", file=file) + get_log().info("non_visual_qa_passed", file=file_check.path) if __name__ == "__main__": diff --git a/scripts/tests/non_visual_qa_test.py b/scripts/tests/non_visual_qa_test.py index 2711155c1..c682fbb0c 100644 --- a/scripts/tests/non_visual_qa_test.py +++ b/scripts/tests/non_visual_qa_test.py @@ -1,6 +1,4 @@ -from typing import List - -from non_visual_qa import check_band_count, check_color_interpretation, check_no_data, check_srs +from non_visual_qa import FileCheck def test_check_band_count_valid() -> None: @@ -10,10 +8,11 @@ def test_check_band_count_valid() -> None: """ gdalinfo = {} gdalinfo["bands"] = [{"band": 1}, {"band": 2}, {"band": 3}] - errors: List[str] = [] - check_band_count(gdalinfo, errors) - assert len(errors) == 0 + file_check = FileCheck("test", b"srs") + file_check.check_band_count(gdalinfo) + + assert not file_check.errors def test_check_band_count_invalid() -> None: @@ -23,10 +22,11 @@ def test_check_band_count_invalid() -> None: """ gdalinfo = {} gdalinfo["bands"] = [{"band": 1}, {"band": 2}] - errors: List[str] = [] - check_band_count(gdalinfo, errors) - assert len(errors) == 1 + file_check = FileCheck("test", b"srs") + file_check.check_band_count(gdalinfo) + + assert file_check.errors def test_check_color_interpretation_valid() -> None: @@ -45,10 +45,11 @@ def test_check_color_interpretation_valid() -> None: "colorInterpretation": "Blue", }, ] - errors: List[str] = [] - check_color_interpretation(gdalinfo, errors) - assert len(errors) == 0 + file_check = FileCheck("test", b"srs") + file_check.check_color_interpretation(gdalinfo) + + assert not file_check.errors def test_check_color_interpretation_invalid() -> None: @@ -70,10 +71,11 @@ def test_check_color_interpretation_invalid() -> None: "colorInterpretation": "Undefined", }, ] - errors: List[str] = [] - check_color_interpretation(gdalinfo, errors) - assert len(errors) == 1 + file_check = FileCheck("test", b"srs") + file_check.check_color_interpretation(gdalinfo) + + assert file_check.errors def test_check_no_data_valid() -> None: @@ -86,10 +88,11 @@ def test_check_no_data_valid() -> None: "noDataValue": 255, } ] - errors: List[str] = [] - check_no_data(gdalinfo, errors) - assert len(errors) == 0 + file_check = FileCheck("test", b"srs") + file_check.check_no_data(gdalinfo) + + assert not file_check.errors def test_check_no_data_no_value() -> None: @@ -98,10 +101,11 @@ def test_check_no_data_no_value() -> None: """ gdalinfo = {} gdalinfo["bands"] = [{"test": 1}] - errors: List[str] = [] - check_no_data(gdalinfo, errors) - assert len(errors) == 1 + file_check = FileCheck("test", b"srs") + file_check.check_no_data(gdalinfo) + + assert file_check.errors def test_check_no_data_invalid_value() -> None: @@ -114,10 +118,11 @@ def test_check_no_data_invalid_value() -> None: "noDataValue": 0, } ] - errors: List[str] = [] - check_no_data(gdalinfo, errors) - assert len(errors) == 1 + file_check = FileCheck("test", b"srs") + file_check.check_no_data(gdalinfo) + + assert file_check.errors def test_check_srs_valid() -> None: @@ -126,10 +131,11 @@ def test_check_srs_valid() -> None: """ srs_to_test_against = b"SRS Test" srs_tif = b"SRS Test" - errors: List[str] = [] - check_srs(srs_to_test_against, srs_tif, errors) - assert len(errors) == 0 + file_check = FileCheck("test", srs_to_test_against) + file_check.check_srs(srs_tif) + + assert not file_check.errors def test_check_srs_invalid() -> None: @@ -138,7 +144,8 @@ def test_check_srs_invalid() -> None: """ srs_to_test_against = b"SRS Test" srs_tif = b"SRS Different" - errors: List[str] = [] - check_srs(srs_to_test_against, srs_tif, errors) - assert len(errors) == 1 + file_check = FileCheck("test", srs_to_test_against) + file_check.check_srs(srs_tif) + + assert file_check.errors