Skip to content

Commit

Permalink
fix: log Non Visual QA error report in a JSON format (#55)
Browse files Browse the repository at this point in the history
* fix: log Non Visual QA error report in a JSON format

* wip

* wip

* wip

* fix: log non-visual-qa as JSON

* fix: raise an exception when gdal output is an error
  • Loading branch information
paulfouquet authored Jul 27, 2022
1 parent 90396a6 commit ed4d623
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 127 deletions.
12 changes: 9 additions & 3 deletions scripts/gdal_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from linz_logger import get_log


class GDALExecutionException(Exception):
pass


def get_vfs_path(path: str) -> str:
"""Make the path as a GDAL Virtual File Systems path.
Expand Down Expand Up @@ -68,10 +72,12 @@ def run_gdal(
proc = subprocess.run(command, env=gdal_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
except subprocess.CalledProcessError as cpe:
get_log().error("run_gdal_failed", command=command_to_string(command), error=str(cpe.stderr, "utf-8"))
raise cpe
raise GDALExecutionException(f"GDAL {str(cpe.stderr, 'utf-8')}") from cpe

if proc.stderr:
get_log().error("run_gdal_error", command=command_to_string(command), error=proc.stderr.decode())
raise Exception(proc.stderr.decode())
get_log().debug("run_gdal_succeded", command=command_to_string(command))
raise GDALExecutionException(proc.stderr.decode())

get_log().debug("run_gdal_succeded", command=command_to_string(command), stdout=proc.stdout.decode())

return proc
214 changes: 120 additions & 94 deletions scripts/non_visual_qa.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,127 @@
import argparse
import json
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

from file_helper import is_tiff
from format_source import format_source
from gdal_helper import run_gdal
from gdal_helper import GDALExecutionException, run_gdal
from linz_logger import get_log


def check_no_data(gdalinfo: Dict[str, Any], errors_list: List[str]) -> None:
"""Add an error in errors_list if there is no "noDataValue" or the "noDataValue" is not equal to 255 in the "bands".
Args:
gdalinfo (Dict[str, Any]): JSON return of gdalinfo in a Python Dictionary.
errors_list (List[str]): List of errors as strings.
"""
bands = gdalinfo["bands"]
if "noDataValue" in bands[0]:
current_nodata_val = bands[0]["noDataValue"]
if current_nodata_val != 255:
errors_list.append(f"noDataValue is {int(current_nodata_val)} not 255")
else:
errors_list.append("noDataValue not set")


def check_band_count(gdalinfo: Dict[str, Any], errors_list: List[str]) -> None:
"""Add an error in errors_list if there is no exactly 3 bands found.
Args:
gdalinfo (Dict[str, Any]): JSON returned by gdalinfo as a Python Dictionary.
errors_list (List[str]): List of errors as strings.
"""
bands = gdalinfo["bands"]
bands_num = len(bands)
if bands_num != 3:
errors_list.append(f"not 3 bands, {bands_num} bands found")


def check_srs(gdalsrsinfo: bytes, gdalsrsinfo_tif: bytes, errors_list: List[str]) -> None:
"""Add an error in errors_list if gdalsrsinfo and gdalsrsinfo_tif values are different.
Args:
gdalsrsinfo (str): Value returned by gdalsrsinfo as a string.
gdalsrsinfo_tif (str): Value returned by gdalsrsinfo for the tif as a string.
errors_list (List[str]): List of errors as strings.
"""
if gdalsrsinfo_tif != gdalsrsinfo:
errors_list.append("different srs")


def check_color_interpretation(gdalinfo: Dict[str, Any], errors_list: List[str]) -> None:
bands = gdalinfo["bands"]
missing_bands = []
band_colour_ints = {1: "Red", 2: "Green", 3: "Blue"}
n = 1
for band in bands:
colour_int = band["colorInterpretation"]
if n in band_colour_ints:
if colour_int != band_colour_ints[n]:
missing_bands.append(f"band {n} {colour_int}")
class FileCheck:
def __init__(self, path: str, srs: bytes) -> None:
self.path = path
self.global_srs = srs
self.errors: List[Dict[str, Any]] = []
self._valid = True

def add_error(self, error_type: str, error_message: str, custom_fields: Optional[Dict[str, str]] = None) -> None:
if not custom_fields:
custom_fields = {}
self.errors.append({"type": error_type, "message": error_message, **custom_fields})
self._valid = False

def is_valid(self) -> bool:
return self._valid

def check_no_data(self, gdalinfo: Dict[str, Any]) -> None:
"""Add an error if there is no "noDataValue" or the "noDataValue" is not equal to 255 in the "bands".
Args:
gdalinfo (Dict[str, Any]): JSON return of gdalinfo in a Python Dictionary.
"""
bands = gdalinfo["bands"]
if "noDataValue" in bands[0]:
current_nodata_val = bands[0]["noDataValue"]
if current_nodata_val != 255:
self.add_error(
error_type="nodata",
error_message="noDataValue is not 255",
custom_fields={"current": f"{int(current_nodata_val)}"},
)
else:
missing_bands.append(f"band {n} {colour_int}")
n += 1
if missing_bands:
missing_bands.sort()
errors_list.append(f"unexpected color interpretation bands; {', '.join(missing_bands)}")


def main() -> None:
self.add_error(error_type="nodata", error_message="noDataValue not set")

def check_band_count(self, gdalinfo: Dict[str, Any]) -> None:
"""Add an error if there is no exactly 3 bands found.
Args:
gdalinfo (Dict[str, Any]): JSON returned by gdalinfo as a Python Dictionary.
"""
bands = gdalinfo["bands"]
bands_num = len(bands)
if bands_num != 3:
self.add_error(
error_type="bands", error_message="bands count is not 3", custom_fields={"count": f"{int(bands_num)}"}
)

def check_srs(self, gdalsrsinfo_tif: bytes) -> None:
"""Add an error if gdalsrsinfo and gdalsrsinfo_tif values are different.
Args:
gdalsrsinfo (str): Value returned by gdalsrsinfo as a string.
gdalsrsinfo_tif (str): Value returned by gdalsrsinfo for the tif as a string.
"""
if gdalsrsinfo_tif != self.global_srs:
self.add_error(error_type="srs", error_message="different srs")

def check_color_interpretation(self, gdalinfo: Dict[str, Any]) -> None:
"""Add an error if the colors don't match RGB.
Args:
gdalinfo (Dict[str, Any]): JSON returned by gdalinfo as a Python Dictionary.
"""
bands = gdalinfo["bands"]
missing_bands = []
band_colour_ints = {1: "Red", 2: "Green", 3: "Blue"}
n = 1
for band in bands:
colour_int = band["colorInterpretation"]
if n in band_colour_ints:
if colour_int != band_colour_ints[n]:
missing_bands.append(f"band {n} {colour_int}")
else:
missing_bands.append(f"band {n} {colour_int}")
n += 1
if missing_bands:
missing_bands.sort()
self.add_error(
error_type="color",
error_message="unexpected color interpretation bands",
custom_fields={"missing": f"{', '.join(missing_bands)}"},
)

def run(self) -> None:
gdalinfo_success = True
gdalinfo_command = ["gdalinfo", "-stats", "-json"]
try:
gdalinfo_process = run_gdal(gdalinfo_command, self.path)
gdalinfo_result = {}
try:
gdalinfo_result = json.loads(gdalinfo_process.stdout)
except json.JSONDecodeError as e:
get_log().error("load_gdalinfo_result_error", file=self.path, error=e)
self.add_error(error_type="gdalinfo", error_message=f"parsing result issue: {str(e)}")
gdalinfo_success = False
if gdalinfo_process.stderr:
self.add_error(error_type="gdalinfo", error_message=f"error(s): {str(gdalinfo_process.stderr)}")
except GDALExecutionException as gee:
self.add_error(error_type="gdalinfo", error_message=f"failed: {str(gee)}")
gdalinfo_success = False

if gdalinfo_success:
self.check_no_data(gdalinfo_result)
self.check_band_count(gdalinfo_result)
self.check_color_interpretation(gdalinfo_result)
gdalsrsinfo_tif_command = ["gdalsrsinfo", "-o", "wkt"]
try:
gdalsrsinfo_tif_result = run_gdal(gdalsrsinfo_tif_command, self.path)
self.check_srs(gdalsrsinfo_tif_result.stdout)
except GDALExecutionException as gee:
self.add_error(error_type="srs", error_message=f"not checked: {str(gee)}")


def main() -> None: # pylint: disable=too-many-locals
parser = argparse.ArgumentParser()
parser.add_argument("--source", dest="source", nargs="+", required=True)
arguments = parser.parse_args()
Expand All @@ -88,41 +142,13 @@ def main() -> None:
if not is_tiff(file):
get_log().trace("non_visual_qa_file_not_tiff_skipped", file=file)
continue
file_check = FileCheck(file, srs)
file_check.run()

gdalinfo_command = ["gdalinfo", "-stats", "-json"]
gdalinfo_process = run_gdal(gdalinfo_command, file)
gdalinfo_result = {}
try:
gdalinfo_result = json.loads(gdalinfo_process.stdout)
except json.JSONDecodeError as e:
get_log().error("load_gdalinfo_result_error", file=file, error=e)
continue

gdalinfo_errors = gdalinfo_process.stderr

# Check result
errors: List[str] = []
# No data
check_no_data(gdalinfo_result, errors)

# Band count
check_band_count(gdalinfo_result, errors)

# srs
gdalsrsinfo_tif_command = ["gdalsrsinfo", "-o", "wkt"]
gdalsrsinfo_tif_result = run_gdal(gdalsrsinfo_tif_command, file)
check_srs(srs, gdalsrsinfo_tif_result.stdout, errors)

# Color interpretation
check_color_interpretation(gdalinfo_result, errors)

# gdal errors
errors.append(f"{gdalinfo_errors!r}")

if len(errors) > 0:
get_log().info("non_visual_qa_errors_found", file=file, result=errors)
if not file_check.is_valid():
get_log().info("non_visual_qa_errors", file=file_check.path, errors=file_check.errors)
else:
get_log().info("non_visual_qa_no_error", file=file)
get_log().info("non_visual_qa_passed", file=file_check.path)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit ed4d623

Please sign in to comment.