From bef5d27622d8f57b894eeb76708aec86b53ca6b6 Mon Sep 17 00:00:00 2001 From: Megan Davidson Date: Tue, 19 Jul 2022 14:39:30 +1200 Subject: [PATCH 1/6] feat: change gdal command to COG --- scripts/gdal_helper.py | 10 +++++++--- scripts/standardising.py | 20 +++++++++++++++++++- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/scripts/gdal_helper.py b/scripts/gdal_helper.py index f6ae26f3c..9a0cc2321 100644 --- a/scripts/gdal_helper.py +++ b/scripts/gdal_helper.py @@ -1,6 +1,6 @@ import os import subprocess -from typing import List +from typing import List, Optional from aws_helper import get_bucket_name_from_path, get_credentials, is_s3 from linz_logger import get_log @@ -30,7 +30,7 @@ def command_to_string(command: List[str]) -> str: return " ".join(command) -def run_gdal(command: List[str], input_file: str = "", output_file: str = "") -> "subprocess.CompletedProcess[bytes]": +def run_gdal(command: List[str] = [], input_file: str = "", output_file: str = "", input_file_index: Optional[int] = None) -> "subprocess.CompletedProcess[bytes]": """Run the GDAL command. The permissions to access to the input file are applied to the gdal environment. Args: @@ -53,7 +53,11 @@ def run_gdal(command: List[str], input_file: str = "", output_file: str = "") -> gdal_env["AWS_ACCESS_KEY_ID"] = credentials.access_key gdal_env["AWS_SECRET_ACCESS_KEY"] = credentials.secret_key gdal_env["AWS_SESSION_TOKEN"] = credentials.token - command.append(get_vfs_path(input_file)) + input_file = get_vfs_path(input_file) + if input_file_index: + command.insert(input_file_index, input_file) + else: + command.append(input_file) if output_file: command.append(output_file) diff --git a/scripts/standardising.py b/scripts/standardising.py index 58c4edccc..041e7a68c 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -47,10 +47,28 @@ "2", "-b", "3", + "-of", + "COG", "-co", "compress=lzw", + "-co", + "num_threads=all_cpus", + "-co", + "predictor=2", + "-co", + "overview_compress=webp", + "-co", + "biggtiff=yes", + "-co", + "overview_resampling=lanczos", + "-co", + "blocksize=512", + "-co", + "overview_quality=90", + "-co", + "sparse_ok=true" ] - run_gdal(command, file, tmp_file_path) + run_gdal(command, input_file=file, output_file=tmp_file_path, input_file_index=17) # Upload the standardized file to destination dst_file_path = os.path.join(dst_path, standardized_file_name).strip("/") From 08fc7c676fed8a60cc4e6c1b28245a4c93265f89 Mon Sep 17 00:00:00 2001 From: Megan Davidson Date: Tue, 19 Jul 2022 14:58:15 +1200 Subject: [PATCH 2/6] fix: formatters --- scripts/gdal_helper.py | 4 +++- scripts/standardising.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/gdal_helper.py b/scripts/gdal_helper.py index 9a0cc2321..61f02b699 100644 --- a/scripts/gdal_helper.py +++ b/scripts/gdal_helper.py @@ -30,7 +30,9 @@ def command_to_string(command: List[str]) -> str: return " ".join(command) -def run_gdal(command: List[str] = [], input_file: str = "", output_file: str = "", input_file_index: Optional[int] = None) -> "subprocess.CompletedProcess[bytes]": +def run_gdal( + command: List[str], input_file: str = "", output_file: str = "", input_file_index: Optional[int] = None +) -> "subprocess.CompletedProcess[bytes]": """Run the GDAL command. The permissions to access to the input file are applied to the gdal environment. Args: diff --git a/scripts/standardising.py b/scripts/standardising.py index 041e7a68c..9bc8b361d 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -66,7 +66,7 @@ "-co", "overview_quality=90", "-co", - "sparse_ok=true" + "sparse_ok=true", ] run_gdal(command, input_file=file, output_file=tmp_file_path, input_file_index=17) From ab30e1782c2fa87e9e1bb3ad772a34adcd294a91 Mon Sep 17 00:00:00 2001 From: Megan Davidson Date: Wed, 20 Jul 2022 13:41:32 +1200 Subject: [PATCH 3/6] fix: index not required --- README.md | 2 ++ scripts/gdal_helper.py | 9 ++------- scripts/standardising.py | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 1d1506f0a..ed0fc173c 100644 --- a/README.md +++ b/README.md @@ -39,3 +39,5 @@ Python version is set to `3.8.10` as it is the current version used by `osgeo/gd ```bash docker run -v ${HOME}/.aws/credentials:/root/.aws/credentials:ro -e AWS_PROFILE='your-aws-profile' 'image-id' python create_polygons.py --uri 's3://path-to-the-tiff/image.tif' --destination 'destination-bucket' ``` + +docker run -v ${HOME}/.aws/credentials:/root/.aws/credentials:ro -e AWS_PROFILE=li-topo-imagery-prod topo-imagery:latest python standardising.py --source s3://linz-imagery-staging/test/sample/BX24_5000_0405.tif --destination s3://linz-imagery-staging/test/cog-test-out/ diff --git a/scripts/gdal_helper.py b/scripts/gdal_helper.py index 61f02b699..2922cb5c1 100644 --- a/scripts/gdal_helper.py +++ b/scripts/gdal_helper.py @@ -1,6 +1,6 @@ import os import subprocess -from typing import List, Optional +from typing import List from aws_helper import get_bucket_name_from_path, get_credentials, is_s3 from linz_logger import get_log @@ -30,9 +30,7 @@ def command_to_string(command: List[str]) -> str: return " ".join(command) -def run_gdal( - command: List[str], input_file: str = "", output_file: str = "", input_file_index: Optional[int] = None -) -> "subprocess.CompletedProcess[bytes]": +def run_gdal(command: List[str], input_file: str = "", output_file: str = "") -> "subprocess.CompletedProcess[bytes]": """Run the GDAL command. The permissions to access to the input file are applied to the gdal environment. Args: @@ -56,9 +54,6 @@ def run_gdal( gdal_env["AWS_SECRET_ACCESS_KEY"] = credentials.secret_key gdal_env["AWS_SESSION_TOKEN"] = credentials.token input_file = get_vfs_path(input_file) - if input_file_index: - command.insert(input_file_index, input_file) - else: command.append(input_file) if output_file: diff --git a/scripts/standardising.py b/scripts/standardising.py index 9bc8b361d..dc198232d 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -68,7 +68,7 @@ "-co", "sparse_ok=true", ] - run_gdal(command, input_file=file, output_file=tmp_file_path, input_file_index=17) + run_gdal(command, input_file=file, output_file=tmp_file_path) # Upload the standardized file to destination dst_file_path = os.path.join(dst_path, standardized_file_name).strip("/") From 1eefef1380cfe138ee41bf6b99080dd6239f843d Mon Sep 17 00:00:00 2001 From: Megan Davidson Date: Wed, 20 Jul 2022 13:47:37 +1200 Subject: [PATCH 4/6] fix: remove test code --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index ed0fc173c..1d1506f0a 100644 --- a/README.md +++ b/README.md @@ -39,5 +39,3 @@ Python version is set to `3.8.10` as it is the current version used by `osgeo/gd ```bash docker run -v ${HOME}/.aws/credentials:/root/.aws/credentials:ro -e AWS_PROFILE='your-aws-profile' 'image-id' python create_polygons.py --uri 's3://path-to-the-tiff/image.tif' --destination 'destination-bucket' ``` - -docker run -v ${HOME}/.aws/credentials:/root/.aws/credentials:ro -e AWS_PROFILE=li-topo-imagery-prod topo-imagery:latest python standardising.py --source s3://linz-imagery-staging/test/sample/BX24_5000_0405.tif --destination s3://linz-imagery-staging/test/cog-test-out/ From 082b8554c5f7e4d9669aa4eafa78de70b85f4fb6 Mon Sep 17 00:00:00 2001 From: Megan Davidson Date: Fri, 22 Jul 2022 14:30:31 +1200 Subject: [PATCH 5/6] fix: typo --- scripts/standardising.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/standardising.py b/scripts/standardising.py index dc198232d..2f33c3cc7 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -58,7 +58,7 @@ "-co", "overview_compress=webp", "-co", - "biggtiff=yes", + "bigtiff=yes", "-co", "overview_resampling=lanczos", "-co", From 23c90582ed624d9becf46448c741a200f743d36c Mon Sep 17 00:00:00 2001 From: Megan Davidson Date: Fri, 22 Jul 2022 14:40:34 +1200 Subject: [PATCH 6/6] fix: catch more errors using stderr --- scripts/gdal_helper.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/gdal_helper.py b/scripts/gdal_helper.py index 2922cb5c1..bad5eb99d 100644 --- a/scripts/gdal_helper.py +++ b/scripts/gdal_helper.py @@ -58,12 +58,16 @@ def run_gdal(command: List[str], input_file: str = "", output_file: str = "") -> if output_file: command.append(output_file) + try: get_log().debug("run_gdal", command=command_to_string(command)) - proc = subprocess.run(command, env=gdal_env, check=True, capture_output=True) + proc = subprocess.run(command, env=gdal_env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) except subprocess.CalledProcessError as cpe: get_log().error("run_gdal_failed", command=command_to_string(command), error=str(cpe.stderr, "utf-8")) raise cpe - get_log().debug("run_gdal_translate_succeded", command=command_to_string(command)) + if proc.stderr: + get_log().error("run_gdal_error", command=command_to_string(command), error=proc.stderr.decode()) + raise Exception(proc.stderr.decode()) + get_log().debug("run_gdal_succeded", command=command_to_string(command)) return proc