From e4edf749909bc9ea490dfa33aa5aee8fd96e275e Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Thu, 25 Aug 2022 10:43:50 +1200 Subject: [PATCH 1/2] feat: switch between two gdal presets "lzw" or "webp" Both are lossless compressions at the top level imagery --- scripts/gdal/gdal_preset.py | 76 +++++++++++++++++++++++++++++++++ scripts/standardise_validate.py | 2 +- scripts/standardising.py | 62 ++++++++------------------- 3 files changed, 94 insertions(+), 46 deletions(-) create mode 100644 scripts/gdal/gdal_preset.py diff --git a/scripts/gdal/gdal_preset.py b/scripts/gdal/gdal_preset.py new file mode 100644 index 000000000..13027df47 --- /dev/null +++ b/scripts/gdal/gdal_preset.py @@ -0,0 +1,76 @@ +from typing import List + +GDAL_PRESET_LZW = [ + "gdal_translate", + "-q", + "-scale", + "0", + "255", + "0", + "254", + "-a_srs", + "EPSG:2193", + "-a_nodata", + "255", + "-b", + "1", + "-b", + "2", + "-b", + "3", + "-of", + "COG", + "-co", + "compress=lzw", + "-co", + "num_threads=all_cpus", + "-co", + "predictor=2", + "-co", + "overview_compress=webp", + "-co", + "bigtiff=yes", + "-co", + "overview_resampling=lanczos", + "-co", + "blocksize=512", + "-co", + "overview_quality=90", + "-co", + "sparse_ok=true", +] + +GDAL_PRESET_WEBP = [ + "gdal_translate", + "-a_srs", + "EPSG:2193", + "-of", + "COG", + "-co", + "compress=webp", + "-co", + "num_threads=all_cpus", + "-co", + "quality=100", + "-co", + "overview_compress=webp", + "-co", + "bigtiff=yes", + "-co", + "overview_resampling=lanczos", + "-co", + "blocksize=512", + "-co", + "overview_quality=90", + "-co", + "sparse_ok=true", +] + + +def get_gdal_command(preset: str) -> List[str]: + print(preset) + if preset == "lzw": + return GDAL_PRESET_LZW + if preset == "webp": + return GDAL_PRESET_WEBP + raise Exception(f"Unknown GDAL preset: {preset}") diff --git a/scripts/standardise_validate.py b/scripts/standardise_validate.py index 94b299fb2..2027d47eb 100644 --- a/scripts/standardise_validate.py +++ b/scripts/standardise_validate.py @@ -10,7 +10,7 @@ def main() -> None: source = parse_source() if is_argo(): concurrency = 4 - standardised_files = start_standardising(source, concurrency) + standardised_files = start_standardising(source, "lzw", concurrency) if standardised_files: non_visual_qa(standardised_files) else: diff --git a/scripts/standardising.py b/scripts/standardising.py index 1e95b8ecc..638316254 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -1,17 +1,20 @@ +import argparse import os +from functools import partial from multiprocessing import Pool from typing import List from linz_logger import get_log from scripts.aws.aws_helper import parse_path -from scripts.cli.cli_helper import is_argo, parse_source +from scripts.cli.cli_helper import format_source, is_argo from scripts.files.files_helper import get_file_name_from_path, is_tiff from scripts.gdal.gdal_helper import run_gdal +from scripts.gdal.gdal_preset import get_gdal_command from scripts.logging.time_helper import time_in_ms -def start_standardising(files: List[str], concurrency: int) -> List[str]: +def start_standardising(files: List[str], preset: str, concurrency: int) -> List[str]: start_time = time_in_ms() tiff_files = [] output_files = [] @@ -25,7 +28,7 @@ def start_standardising(files: List[str], concurrency: int) -> List[str]: get_log().info("standardising_file_not_tiff_skipped", file=file) with Pool(concurrency) as p: - output_files = p.map(standardising, tiff_files) + output_files = p.map(partial(standardising, preset=preset), tiff_files) p.close() p.join() @@ -34,7 +37,7 @@ def start_standardising(files: List[str], concurrency: int) -> List[str]: return output_files -def standardising(file: str) -> str: +def standardising(file: str, preset: str) -> str: output_folder = "/tmp/" get_log().info("standardising_start", source=file) @@ -43,45 +46,7 @@ def standardising(file: str) -> str: standardized_file_name = f"{get_file_name_from_path(src_file_path)}.tiff" tmp_file_path = os.path.join(output_folder, standardized_file_name) - command = [ - "gdal_translate", - "-q", - "-scale", - "0", - "255", - "0", - "254", - "-a_srs", - "EPSG:2193", - "-a_nodata", - "255", - "-b", - "1", - "-b", - "2", - "-b", - "3", - "-of", - "COG", - "-co", - "compress=lzw", - "-co", - "num_threads=all_cpus", - "-co", - "predictor=2", - "-co", - "overview_compress=webp", - "-co", - "bigtiff=yes", - "-co", - "overview_resampling=lanczos", - "-co", - "blocksize=512", - "-co", - "overview_quality=90", - "-co", - "sparse_ok=true", - ] + command = get_gdal_command(preset) run_gdal(command, input_file=file, output_file=tmp_file_path) return tmp_file_path @@ -89,10 +54,17 @@ def standardising(file: str) -> str: def main() -> None: concurrency: int = 1 - source = parse_source() + parser = argparse.ArgumentParser() + parser.add_argument("--preset", dest="preset", required=False, default="lzw") + parser.add_argument("--source", dest="source", nargs="+", required=True) + arguments = parser.parse_args() + + source = format_source(arguments.source) + if is_argo(): concurrency = 4 - start_standardising(source, concurrency) + + start_standardising(source, arguments.preset, concurrency) if __name__ == "__main__": From 6f57c3f850e4280709b6a6fe5eed9effd373037d Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Thu, 25 Aug 2022 10:51:14 +1200 Subject: [PATCH 2/2] refactor: log preset used --- scripts/gdal/gdal_preset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/gdal/gdal_preset.py b/scripts/gdal/gdal_preset.py index 13027df47..633a4c353 100644 --- a/scripts/gdal/gdal_preset.py +++ b/scripts/gdal/gdal_preset.py @@ -1,5 +1,7 @@ from typing import List +from linz_logger import get_log + GDAL_PRESET_LZW = [ "gdal_translate", "-q", @@ -68,7 +70,7 @@ def get_gdal_command(preset: str) -> List[str]: - print(preset) + get_log().info("gdal_preset", preset=preset) if preset == "lzw": return GDAL_PRESET_LZW if preset == "webp":