diff --git a/scripts/converters/__init__.py b/scripts/converters/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/scripts/converters/format_source.py b/scripts/converters/format_source.py deleted file mode 100644 index e49a08658..000000000 --- a/scripts/converters/format_source.py +++ /dev/null @@ -1,32 +0,0 @@ -import argparse -import json -from typing import List - -from linz_logger import get_log - - -def format_source(source: List[str]) -> List[str]: - """Due to Argo constraints if using the basemaps cli list command - the source has a string that contains a list that needs to be split. - example: ["[\"s3://test/image_one.tiff\", \"s3://test/image_two.tiff\"]"] - """ - if len(source) == 1 and source[0].startswith("["): - try: - source_json: List[str] = json.loads(source[0]) - return source_json - except json.JSONDecodeError as e: - get_log().debug("Decoding Json Failed", source=source, msg=e) - return source - - -def parse_source() -> List[str]: - """Parse the CLI argument '--source' and format it to a list of paths. - - Returns: - List[str]: A list of paths. - """ - parser = argparse.ArgumentParser() - parser.add_argument("--source", dest="source", nargs="+", required=True) - arguments = parser.parse_args() - - return format_source(arguments.source) diff --git a/scripts/converters/tests/__init__.py b/scripts/converters/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/scripts/converters/tests/format_source_test.py b/scripts/converters/tests/format_source_test.py deleted file mode 100644 index 383aed33d..000000000 --- a/scripts/converters/tests/format_source_test.py +++ /dev/null @@ -1,41 +0,0 @@ -from scripts.cli.cli_helper import format_source - - -def test_format_source_from_basemaps_cli_file() -> None: - """Based on output from basemaps cli - example: --source "[\"s3://test/image_one.tiff\", \"s3://test/image_two.tiff\"]" - """ - # fmt: off - source = ["[\"s3://test/image_one.tiff\", \"s3://test/image_two.tiff\"]"] - # fmt: on - file_list = format_source(source) - assert isinstance(file_list, list) - assert len(file_list) == 2 - assert file_list == ["s3://test/image_one.tiff", "s3://test/image_two.tiff"] - - -def test_format_source_single_input() -> None: - """example: --source s3://test/image_one.tiff""" - source = ["s3://test/image_one.tiff"] - file_list = format_source(source) - assert isinstance(file_list, list) - assert len(file_list) == 1 - assert file_list == ["s3://test/image_one.tiff"] - - -def test_format_source_multiple_inputs() -> None: - """example: --source s3://test/image_one.tiff s3://test/image_two.tiff""" - source = ["s3://test/image_one.tiff", "s3://test/image_two.tiff"] - file_list = format_source(source) - assert isinstance(file_list, list) - assert len(file_list) == 2 - assert file_list == ["s3://test/image_one.tiff", "s3://test/image_two.tiff"] - - -def test_format_source_json_loading_error() -> None: - """example: --source [s3://test/image_one.tiff""" - source = ["[s3://test/image_one.tiff"] - file_list = format_source(source) - assert isinstance(file_list, list) - assert len(file_list) == 1 - assert file_list == ["[s3://test/image_one.tiff"] diff --git a/scripts/non_visual_qa.py b/scripts/non_visual_qa.py index cc1833628..a77621027 100644 --- a/scripts/non_visual_qa.py +++ b/scripts/non_visual_qa.py @@ -122,11 +122,10 @@ def run(self) -> None: self.add_error(error_type="srs", error_message=f"not checked: {str(gee)}") -def main() -> None: +def non_visual_qa(files: List[str]) -> None: start_time = time_in_ms() - source = parse_source() - get_log().info("non_visual_qa_start", source=source) + get_log().info("non_visual_qa_start", source=files) # Get srs gdalsrsinfo_command = ["gdalsrsinfo", "-o", "wkt", "EPSG:2193"] @@ -137,7 +136,7 @@ def main() -> None: ) srs = gdalsrsinfo_result.stdout - for file in source: + for file in files: if not is_tiff(file): get_log().trace("non_visual_qa_file_not_tiff_skipped", file=file) continue @@ -149,7 +148,12 @@ def main() -> None: else: get_log().info("non_visual_qa_passed", file=file_check.path) - get_log().info("non_visual_qa_end", source=source, duration=time_in_ms() - start_time) + get_log().info("non_visual_qa_end", source=files, duration=time_in_ms() - start_time) + + +def main() -> None: + source = parse_source() + non_visual_qa(source) if __name__ == "__main__": diff --git a/scripts/standardise_validate.py b/scripts/standardise_validate.py new file mode 100644 index 000000000..3cbba3bee --- /dev/null +++ b/scripts/standardise_validate.py @@ -0,0 +1,13 @@ +from scripts.cli.cli_helper import parse_source +from scripts.non_visual_qa import non_visual_qa +from scripts.standardising import standardising + + +def main() -> None: + source = parse_source() + standardised_files = standardising(source) + non_visual_qa(standardised_files) + + +if __name__ == "__main__": + main() diff --git a/scripts/standardising.py b/scripts/standardising.py index b8deee37a..f0d684b95 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -1,4 +1,5 @@ import os +from typing import List from linz_logger import get_log @@ -8,62 +9,73 @@ from scripts.gdal.gdal_helper import run_gdal from scripts.logging.time_helper import time_in_ms -start_time = time_in_ms() +def standardising(files: List[str]) -> List[str]: + start_time = time_in_ms() + output_folder = "/tmp/" + output_files = [] -source = parse_source() + get_log().info("standardising_start", source=files) -get_log().info("standardising_start", source=source) + for file in files: + if not is_tiff(file): + get_log().info("standardising_file_not_tiff_skipped", file=file) + continue -gdal_env = os.environ.copy() + _, src_file_path = parse_path(file) + standardized_file_name = f"standardized_{get_file_name_from_path(src_file_path)}" + tmp_file_path = os.path.join(output_folder, standardized_file_name) + command = [ + "gdal_translate", + "-q", + "-scale", + "0", + "255", + "0", + "254", + "-a_srs", + "EPSG:2193", + "-a_nodata", + "255", + "-b", + "1", + "-b", + "2", + "-b", + "3", + "-of", + "COG", + "-co", + "compress=lzw", + "-co", + "num_threads=all_cpus", + "-co", + "predictor=2", + "-co", + "overview_compress=webp", + "-co", + "bigtiff=yes", + "-co", + "overview_resampling=lanczos", + "-co", + "blocksize=512", + "-co", + "overview_quality=90", + "-co", + "sparse_ok=true", + ] + run_gdal(command, input_file=file, output_file=tmp_file_path) + output_files.append(tmp_file_path) -for file in source: - if not is_tiff(file): - get_log().trace("standardising_file_not_tiff_skipped", file=file) - continue + get_log().info("standardising_end", source=files, duration=time_in_ms() - start_time) - s3_path = parse_path(file) - standardized_file_name = f"standardized_{get_file_name_from_path(s3_path.key)}" - tmp_file_path = os.path.join("/tmp/", standardized_file_name) - command = [ - "gdal_translate", - "-q", - "-scale", - "0", - "255", - "0", - "254", - "-a_srs", - "EPSG:2193", - "-a_nodata", - "255", - "-b", - "1", - "-b", - "2", - "-b", - "3", - "-of", - "COG", - "-co", - "compress=lzw", - "-co", - "num_threads=all_cpus", - "-co", - "predictor=2", - "-co", - "overview_compress=webp", - "-co", - "bigtiff=yes", - "-co", - "overview_resampling=lanczos", - "-co", - "blocksize=512", - "-co", - "overview_quality=90", - "-co", - "sparse_ok=true", - ] - run_gdal(command, input_file=file, output_file=tmp_file_path) + return output_files - get_log().info("standardising_end", source=source, duration=time_in_ms() - start_time) + +def main() -> None: + source = parse_source() + standardising(source) + + +if __name__ == "__main__": + main()