From 910655b6d6d1ffae08342117cada9e9ff45655ce Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Mon, 15 Aug 2022 15:30:27 +1200 Subject: [PATCH 1/4] feat: run standardising following by non_visual_qa --- scripts/non_visual_qa.py | 14 ++-- scripts/standardise_validate.py | 19 ++++++ scripts/standardising.py | 116 ++++++++++++++++++-------------- 3 files changed, 92 insertions(+), 57 deletions(-) create mode 100644 scripts/standardise_validate.py diff --git a/scripts/non_visual_qa.py b/scripts/non_visual_qa.py index cc1833628..a77621027 100644 --- a/scripts/non_visual_qa.py +++ b/scripts/non_visual_qa.py @@ -122,11 +122,10 @@ def run(self) -> None: self.add_error(error_type="srs", error_message=f"not checked: {str(gee)}") -def main() -> None: +def non_visual_qa(files: List[str]) -> None: start_time = time_in_ms() - source = parse_source() - get_log().info("non_visual_qa_start", source=source) + get_log().info("non_visual_qa_start", source=files) # Get srs gdalsrsinfo_command = ["gdalsrsinfo", "-o", "wkt", "EPSG:2193"] @@ -137,7 +136,7 @@ def main() -> None: ) srs = gdalsrsinfo_result.stdout - for file in source: + for file in files: if not is_tiff(file): get_log().trace("non_visual_qa_file_not_tiff_skipped", file=file) continue @@ -149,7 +148,12 @@ def main() -> None: else: get_log().info("non_visual_qa_passed", file=file_check.path) - get_log().info("non_visual_qa_end", source=source, duration=time_in_ms() - start_time) + get_log().info("non_visual_qa_end", source=files, duration=time_in_ms() - start_time) + + +def main() -> None: + source = parse_source() + non_visual_qa(source) if __name__ == "__main__": diff --git a/scripts/standardise_validate.py b/scripts/standardise_validate.py new file mode 100644 index 000000000..79457f3f2 --- /dev/null +++ b/scripts/standardise_validate.py @@ -0,0 +1,19 @@ +from typing import List + +from scripts.cli.cli_helper import parse_source +from scripts.non_visual_qa import non_visual_qa +from scripts.standardising import standardising + + +def standardise_validate(files: List[str]) -> None: + standardising(files) + + +def main() -> None: + source = parse_source() + standardised_files = standardising(source) + non_visual_qa(standardised_files) + + +if __name__ == "__main__": + main() diff --git a/scripts/standardising.py b/scripts/standardising.py index b8deee37a..612efaab1 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -1,4 +1,5 @@ import os +from typing import List from linz_logger import get_log @@ -8,62 +9,73 @@ from scripts.gdal.gdal_helper import run_gdal from scripts.logging.time_helper import time_in_ms -start_time = time_in_ms() +def standardising(files: List[str]) -> str: + start_time = time_in_ms() + output_folder = "/tmp/" + output_files = [] -source = parse_source() + get_log().info("standardising_start", source=files) -get_log().info("standardising_start", source=source) + for file in files: + if not is_tiff(file): + get_log().info("standardising_file_not_tiff_skipped", file=file) + continue -gdal_env = os.environ.copy() + _, src_file_path = parse_path(file) + standardized_file_name = f"standardized_{get_file_name_from_path(src_file_path)}" + tmp_file_path = os.path.join(output_folder, standardized_file_name) + command = [ + "gdal_translate", + "-q", + "-scale", + "0", + "255", + "0", + "254", + "-a_srs", + "EPSG:2193", + "-a_nodata", + "255", + "-b", + "1", + "-b", + "2", + "-b", + "3", + "-of", + "COG", + "-co", + "compress=lzw", + "-co", + "num_threads=all_cpus", + "-co", + "predictor=2", + "-co", + "overview_compress=webp", + "-co", + "bigtiff=yes", + "-co", + "overview_resampling=lanczos", + "-co", + "blocksize=512", + "-co", + "overview_quality=90", + "-co", + "sparse_ok=true", + ] + run_gdal(command, input_file=file, output_file=tmp_file_path) + output_files.append(tmp_file_path) -for file in source: - if not is_tiff(file): - get_log().trace("standardising_file_not_tiff_skipped", file=file) - continue + get_log().info("standardising_end", source=files, duration=time_in_ms() - start_time) - s3_path = parse_path(file) - standardized_file_name = f"standardized_{get_file_name_from_path(s3_path.key)}" - tmp_file_path = os.path.join("/tmp/", standardized_file_name) - command = [ - "gdal_translate", - "-q", - "-scale", - "0", - "255", - "0", - "254", - "-a_srs", - "EPSG:2193", - "-a_nodata", - "255", - "-b", - "1", - "-b", - "2", - "-b", - "3", - "-of", - "COG", - "-co", - "compress=lzw", - "-co", - "num_threads=all_cpus", - "-co", - "predictor=2", - "-co", - "overview_compress=webp", - "-co", - "bigtiff=yes", - "-co", - "overview_resampling=lanczos", - "-co", - "blocksize=512", - "-co", - "overview_quality=90", - "-co", - "sparse_ok=true", - ] - run_gdal(command, input_file=file, output_file=tmp_file_path) + return output_files - get_log().info("standardising_end", source=source, duration=time_in_ms() - start_time) + +def main() -> None: + source = parse_source() + standardising(source) + + +if __name__ == "__main__": + main() From dbfee1c6e4d0584c75032729b62c4ecd8dd15d2d Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Mon, 15 Aug 2022 15:32:34 +1200 Subject: [PATCH 2/4] fix: typing --- scripts/standardising.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/standardising.py b/scripts/standardising.py index 612efaab1..f0d684b95 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -10,7 +10,7 @@ from scripts.logging.time_helper import time_in_ms -def standardising(files: List[str]) -> str: +def standardising(files: List[str]) -> List[str]: start_time = time_in_ms() output_folder = "/tmp/" output_files = [] From 6728916e3fe9f622f4bcbc227cd82f71e32388ed Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Thu, 18 Aug 2022 10:07:50 +1200 Subject: [PATCH 3/4] chore: remove unused function --- scripts/standardise_validate.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scripts/standardise_validate.py b/scripts/standardise_validate.py index 79457f3f2..3cbba3bee 100644 --- a/scripts/standardise_validate.py +++ b/scripts/standardise_validate.py @@ -1,14 +1,8 @@ -from typing import List - from scripts.cli.cli_helper import parse_source from scripts.non_visual_qa import non_visual_qa from scripts.standardising import standardising -def standardise_validate(files: List[str]) -> None: - standardising(files) - - def main() -> None: source = parse_source() standardised_files = standardising(source) From fb631294f7dc989f18c7a0b21d83261cd241638f Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Thu, 18 Aug 2022 10:19:57 +1200 Subject: [PATCH 4/4] chore: remove unused and duplicated code --- scripts/converters/__init__.py | 0 scripts/converters/format_source.py | 32 --------------- scripts/converters/tests/__init__.py | 0 .../converters/tests/format_source_test.py | 41 ------------------- 4 files changed, 73 deletions(-) delete mode 100644 scripts/converters/__init__.py delete mode 100644 scripts/converters/format_source.py delete mode 100644 scripts/converters/tests/__init__.py delete mode 100644 scripts/converters/tests/format_source_test.py diff --git a/scripts/converters/__init__.py b/scripts/converters/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/scripts/converters/format_source.py b/scripts/converters/format_source.py deleted file mode 100644 index e49a08658..000000000 --- a/scripts/converters/format_source.py +++ /dev/null @@ -1,32 +0,0 @@ -import argparse -import json -from typing import List - -from linz_logger import get_log - - -def format_source(source: List[str]) -> List[str]: - """Due to Argo constraints if using the basemaps cli list command - the source has a string that contains a list that needs to be split. - example: ["[\"s3://test/image_one.tiff\", \"s3://test/image_two.tiff\"]"] - """ - if len(source) == 1 and source[0].startswith("["): - try: - source_json: List[str] = json.loads(source[0]) - return source_json - except json.JSONDecodeError as e: - get_log().debug("Decoding Json Failed", source=source, msg=e) - return source - - -def parse_source() -> List[str]: - """Parse the CLI argument '--source' and format it to a list of paths. - - Returns: - List[str]: A list of paths. - """ - parser = argparse.ArgumentParser() - parser.add_argument("--source", dest="source", nargs="+", required=True) - arguments = parser.parse_args() - - return format_source(arguments.source) diff --git a/scripts/converters/tests/__init__.py b/scripts/converters/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/scripts/converters/tests/format_source_test.py b/scripts/converters/tests/format_source_test.py deleted file mode 100644 index 383aed33d..000000000 --- a/scripts/converters/tests/format_source_test.py +++ /dev/null @@ -1,41 +0,0 @@ -from scripts.cli.cli_helper import format_source - - -def test_format_source_from_basemaps_cli_file() -> None: - """Based on output from basemaps cli - example: --source "[\"s3://test/image_one.tiff\", \"s3://test/image_two.tiff\"]" - """ - # fmt: off - source = ["[\"s3://test/image_one.tiff\", \"s3://test/image_two.tiff\"]"] - # fmt: on - file_list = format_source(source) - assert isinstance(file_list, list) - assert len(file_list) == 2 - assert file_list == ["s3://test/image_one.tiff", "s3://test/image_two.tiff"] - - -def test_format_source_single_input() -> None: - """example: --source s3://test/image_one.tiff""" - source = ["s3://test/image_one.tiff"] - file_list = format_source(source) - assert isinstance(file_list, list) - assert len(file_list) == 1 - assert file_list == ["s3://test/image_one.tiff"] - - -def test_format_source_multiple_inputs() -> None: - """example: --source s3://test/image_one.tiff s3://test/image_two.tiff""" - source = ["s3://test/image_one.tiff", "s3://test/image_two.tiff"] - file_list = format_source(source) - assert isinstance(file_list, list) - assert len(file_list) == 2 - assert file_list == ["s3://test/image_one.tiff", "s3://test/image_two.tiff"] - - -def test_format_source_json_loading_error() -> None: - """example: --source [s3://test/image_one.tiff""" - source = ["[s3://test/image_one.tiff"] - file_list = format_source(source) - assert isinstance(file_list, list) - assert len(file_list) == 1 - assert file_list == ["[s3://test/image_one.tiff"]