From 4f95865c93d014f9ecf70e575f1eefeb16d04ee6 Mon Sep 17 00:00:00 2001 From: Manuel Goacolou Date: Wed, 31 Jul 2024 16:29:19 +0200 Subject: [PATCH 1/3] use persitent cache of dem --- dem_stitcher/stitcher.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/dem_stitcher/stitcher.py b/dem_stitcher/stitcher.py index 022bf24..b32647a 100644 --- a/dem_stitcher/stitcher.py +++ b/dem_stitcher/stitcher.py @@ -81,12 +81,13 @@ def extract_dest_path_from_url(tile_id): def download_and_write_one_partial(zipped_data: list) -> dict: return _download_and_write_one_tile_to_gtiff(zipped_data[0], zipped_data[1], reader, dem_name) - data_list = zip(urls, dest_paths) + # filter non existing destination path + data_list = [(u, d) for u, d in zip(urls, dest_paths) if not d.exists()] with ThreadPoolExecutor(max_workers=max_workers_for_download) as executor: list( tqdm( executor.map(download_and_write_one_partial, data_list), - total=len(urls), + total=len(data_list), desc=f'Downloading {dem_name} tiles', ) ) @@ -136,7 +137,7 @@ def get_dem_tile_paths( if dem_name in ['glo_30', 'glo_90', '3dep', 'glo_90_missing']: dem_paths = urls else: - warn(f'We need to localize the tiles as a Geotiff. Saving to {str(tile_dir)}', category=UserWarning) + warn(f"We need to localize the tiles as a Geotiff. Saving to {str(tile_dir)}", category=UserWarning) if (dem_name not in ['glo_30', 'glo_90', '3dep', 'glo_90_missing']) or localize_tiles_to_gtiff: if isinstance(tile_dir, str): @@ -151,9 +152,9 @@ def get_dem_tile_paths( def shift_profile_for_pixel_loc(src_profile: dict, src_area_or_point: str, dst_area_or_point: str) -> dict: - assert dst_area_or_point in ['Area', 'Point'] - assert src_area_or_point in ['Area', 'Point'] - if (dst_area_or_point == 'Point') and (src_area_or_point == 'Area'): + assert dst_area_or_point in ["Area", "Point"] + assert src_area_or_point in ["Area", "Point"] + if (dst_area_or_point == "Point") and (src_area_or_point == "Area"): shift = -0.5 profile_shifted = translate_profile(src_profile, shift, shift) elif (dst_area_or_point == 'Area') and (src_area_or_point == 'Point'): @@ -260,6 +261,7 @@ def stitch_dem( n_threads_downloading: int = 10, fill_in_glo_30: bool = True, merge_nodata_value: float = np.nan, + persistent_tile_cache_dir: Union[Path, str, None] = None, ) -> tuple[np.ndarray, dict]: """This is API for stitching DEMs. Specify bounds and various options to obtain a continuous raster. The output raster will be determined by availability of tiles. If no tiles are available over bounds, @@ -292,7 +294,8 @@ def stitch_dem( When set to np.nan (default), all areas with nodata in tiles are consistently marked in output as such. When set to 0 and converting to ellipsoidal heights, all nodata areas will be filled in with geoid. When set to 0 and not converting to ellipsoidal heights, all nodata areas will be 0. - + persistent_tile_cache_dir: Path, str, optional + If not None do not remove tiles already downloaded, and use them instead of downloading. Returns ------- tuple[np.ndarray, dict] @@ -313,19 +316,18 @@ def stitch_dem( fill_in_glo_30 = fill_in_glo_30 and glo_90_missing_intersection if merge_nodata_value not in [np.nan, 0]: - raise ValueError('np.nan and 0 are only acceptable merge_nodata_value') + raise ValueError("np.nan and 0 are only acceptable merge_nodata_value") # Random unique identifier tmp_id = str(uuid.uuid4()) - tile_dir = Path(f'tmp_{tmp_id}') + tile_dir = persistent_tile_cache_dir or Path(f'tmp_{tmp_id}') if dem_name in ['srtm_v3', 'nasadem']: ensure_earthdata_credentials() - dem_paths = get_dem_tile_paths( bounds=bounds, dem_name=dem_name, - localize_tiles_to_gtiff=False, + localize_tiles_to_gtiff=persistent_tile_cache_dir is not None, n_threads_downloading=n_threads_downloading, tile_dir=tile_dir, ) @@ -375,7 +377,7 @@ def stitch_dem( list(map(lambda dataset: dataset.close(), datasets)) # Delete orginal tiles if downloaded - if tile_dir.exists(): + if tile_dir.exists() and persistent_tile_cache_dir is None: shutil.rmtree(str(tile_dir)) # Created in memory file containers if there is a dateline crossing for translation From 01eb315bdfd9e0bddc45f9785212a21d4fed516f Mon Sep 17 00:00:00 2001 From: Manuel Goacolou Date: Wed, 31 Jul 2024 16:42:35 +0200 Subject: [PATCH 2/3] manage quotes --- dem_stitcher/stitcher.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dem_stitcher/stitcher.py b/dem_stitcher/stitcher.py index b32647a..7af07cd 100644 --- a/dem_stitcher/stitcher.py +++ b/dem_stitcher/stitcher.py @@ -137,7 +137,7 @@ def get_dem_tile_paths( if dem_name in ['glo_30', 'glo_90', '3dep', 'glo_90_missing']: dem_paths = urls else: - warn(f"We need to localize the tiles as a Geotiff. Saving to {str(tile_dir)}", category=UserWarning) + warn(f'We need to localize the tiles as a Geotiff. Saving to {str(tile_dir)}', category=UserWarning) if (dem_name not in ['glo_30', 'glo_90', '3dep', 'glo_90_missing']) or localize_tiles_to_gtiff: if isinstance(tile_dir, str): @@ -152,9 +152,9 @@ def get_dem_tile_paths( def shift_profile_for_pixel_loc(src_profile: dict, src_area_or_point: str, dst_area_or_point: str) -> dict: - assert dst_area_or_point in ["Area", "Point"] - assert src_area_or_point in ["Area", "Point"] - if (dst_area_or_point == "Point") and (src_area_or_point == "Area"): + assert dst_area_or_point in ['Area', 'Point'] + assert src_area_or_point in ['Area', 'Point'] + if (dst_area_or_point == 'Point') and (src_area_or_point == 'Area'): shift = -0.5 profile_shifted = translate_profile(src_profile, shift, shift) elif (dst_area_or_point == 'Area') and (src_area_or_point == 'Point'): @@ -316,7 +316,7 @@ def stitch_dem( fill_in_glo_30 = fill_in_glo_30 and glo_90_missing_intersection if merge_nodata_value not in [np.nan, 0]: - raise ValueError("np.nan and 0 are only acceptable merge_nodata_value") + raise ValueError('np.nan and 0 are only acceptable merge_nodata_value') # Random unique identifier tmp_id = str(uuid.uuid4()) From 0e7ff1883a83194aef863a4606672a66f5af454a Mon Sep 17 00:00:00 2001 From: Manuel Goacolou Date: Wed, 18 Sep 2024 11:04:55 +0200 Subject: [PATCH 3/3] update changelog --- CHANGELOG.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb7941d..02bb6d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,9 +6,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/) and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [next] +### New +* add persistent cache of downloaded DEM + ## [2.5.8] ### Fixed -* Resolves read_geoid issue [here](https://github.com/ACCESS-Cloud-Based-InSAR/dem-stitcher/issues/96). +* Resolves read_geoid issue [here](https://github.com/ACCESS-Cloud-Based-InSAR/dem-stitcher/issues/96). * Update geoid url for egm08 (again) creating public bucket for ACCESS processing * Included egm96 as gtx in the data directory * egm08 and egm96 data comes from here: https://download.osgeo.org/proj/vdatum/ @@ -20,7 +24,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). * when no credentials in netrc are present when requesting data for `nasadem` or `srtm_v3`, there is a human readable error instructing user to update their `~/.netrc`. * Updates some ruff linting * Ensures ruff in `environment.yml` - * Ensure single quotes for consistency. + * Ensure single quotes for consistency. ### Changed * egm08 is now using 2.5 deg raster rather than 1 deg.