From 9260658a3cf6b0e2019983c5ea349e1ee2d6ac97 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Tue, 28 May 2019 09:26:38 -0300 Subject: [PATCH 1/8] Add chunks and **kwargs args to fetch_bedmap2 --- rockhound/bedmap2.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/rockhound/bedmap2.py b/rockhound/bedmap2.py index 865df880..7355ac1a 100644 --- a/rockhound/bedmap2.py +++ b/rockhound/bedmap2.py @@ -24,7 +24,7 @@ } -def fetch_bedmap2(datasets, *, load=True): +def fetch_bedmap2(datasets, *, load=True, chunks=100, **kwargs): """ Fetch the Bedmap2 datasets for Antarctica. @@ -70,6 +70,12 @@ def fetch_bedmap2(datasets, *, load=True): Wether to load the data into an :class:`xarray.Dataset` or just return the path to the downloaded data tiff files. If False, will return a list with the paths to the files corresponding to *datasets*. + chunks : int, tuple or dict + Chunk sizes along each dimension. This argument is passed to the + :func:`xarray.open_rasterio` function in order to return a Dask array. + This helps to read the dataset without loading it entirely into memory. + kwargs : dict + Extra parameters passed to the :func:`xarray.open_rasterio` function. Returns ------- @@ -88,7 +94,7 @@ def fetch_bedmap2(datasets, *, load=True): return [get_fname(dataset, fnames) for dataset in datasets] arrays = [] for dataset in datasets: - array = xr.open_rasterio(get_fname(dataset, fnames)) + array = xr.open_rasterio(get_fname(dataset, fnames), chunks=chunks, **kwargs) # Replace no data values with nans array = array.where(array != array.nodatavals) # Remove "band" dimension and coordinate From 16fc47db53d11fa0bd6c762c9ed64cff961c48e1 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Tue, 28 May 2019 09:49:08 -0300 Subject: [PATCH 2/8] Improve Bedmap2 docstring --- rockhound/bedmap2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rockhound/bedmap2.py b/rockhound/bedmap2.py index 7355ac1a..35916cd7 100644 --- a/rockhound/bedmap2.py +++ b/rockhound/bedmap2.py @@ -72,7 +72,8 @@ def fetch_bedmap2(datasets, *, load=True, chunks=100, **kwargs): paths to the files corresponding to *datasets*. chunks : int, tuple or dict Chunk sizes along each dimension. This argument is passed to the - :func:`xarray.open_rasterio` function in order to return a Dask array. + :func:`xarray.open_rasterio` function in order to obtain Dask arrays inside the + returned :class:`xarray.Dataset`. This helps to read the dataset without loading it entirely into memory. kwargs : dict Extra parameters passed to the :func:`xarray.open_rasterio` function. From 0d26052c1ec39ccc84a5d9fa05bfd402673b390e Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Tue, 28 May 2019 10:16:29 -0300 Subject: [PATCH 3/8] Add url to Dask array on fetch_bedmap2 docstring --- rockhound/bedmap2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rockhound/bedmap2.py b/rockhound/bedmap2.py index 35916cd7..6db2ae23 100644 --- a/rockhound/bedmap2.py +++ b/rockhound/bedmap2.py @@ -72,7 +72,8 @@ def fetch_bedmap2(datasets, *, load=True, chunks=100, **kwargs): paths to the files corresponding to *datasets*. chunks : int, tuple or dict Chunk sizes along each dimension. This argument is passed to the - :func:`xarray.open_rasterio` function in order to obtain Dask arrays inside the + :func:`xarray.open_rasterio` function in order to obtain + `Dask arrays `_ inside the returned :class:`xarray.Dataset`. This helps to read the dataset without loading it entirely into memory. kwargs : dict From 6d01e5d458fb18e3d51c6392ea867e2d174802bb Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Tue, 28 May 2019 10:36:58 -0300 Subject: [PATCH 4/8] Add dask as a dependency --- doc/install.rst | 1 + environment.yml | 1 + requirements.txt | 1 + 3 files changed, 3 insertions(+) diff --git a/doc/install.rst b/doc/install.rst index 9831ca4f..25df62f5 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -23,6 +23,7 @@ Dependencies * `xarray `__ * `pandas `__ * `rasterio `__ +* `dask `__ Most of the examples in the :ref:`gallery` also use: diff --git a/environment.yml b/environment.yml index f42dc853..db830d26 100644 --- a/environment.yml +++ b/environment.yml @@ -9,6 +9,7 @@ dependencies: - xarray - pandas - rasterio + - dask # Development requirements - matplotlib - cmocean diff --git a/requirements.txt b/requirements.txt index 7adef2e1..26e0598c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ pooch>=0.5 xarray pandas rasterio +dask From 7fc4de74534c6388cd3a96befb5e0bb96b50c081 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Wed, 29 May 2019 13:32:40 -0300 Subject: [PATCH 5/8] Change kwargs parameter in docstring Co-Authored-By: Leonardo Uieda --- rockhound/bedmap2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rockhound/bedmap2.py b/rockhound/bedmap2.py index 6db2ae23..bd6e08a1 100644 --- a/rockhound/bedmap2.py +++ b/rockhound/bedmap2.py @@ -76,7 +76,7 @@ def fetch_bedmap2(datasets, *, load=True, chunks=100, **kwargs): `Dask arrays `_ inside the returned :class:`xarray.Dataset`. This helps to read the dataset without loading it entirely into memory. - kwargs : dict + **kwargs Extra parameters passed to the :func:`xarray.open_rasterio` function. Returns From e7909770a74084f816317a49b67358e2da7498a5 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Wed, 29 May 2019 13:41:06 -0300 Subject: [PATCH 6/8] Change default chunks size on fetch_bedmap2 --- rockhound/bedmap2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rockhound/bedmap2.py b/rockhound/bedmap2.py index bd6e08a1..20ee2ff6 100644 --- a/rockhound/bedmap2.py +++ b/rockhound/bedmap2.py @@ -24,7 +24,7 @@ } -def fetch_bedmap2(datasets, *, load=True, chunks=100, **kwargs): +def fetch_bedmap2(datasets, *, load=True, chunks=1000, **kwargs): """ Fetch the Bedmap2 datasets for Antarctica. From 5b5763bdc2604a5c4d9b61474a0bca0af97bb612 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Wed, 29 May 2019 13:46:11 -0300 Subject: [PATCH 7/8] Update memory warning --- rockhound/bedmap2.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rockhound/bedmap2.py b/rockhound/bedmap2.py index 20ee2ff6..d767de91 100644 --- a/rockhound/bedmap2.py +++ b/rockhound/bedmap2.py @@ -55,8 +55,11 @@ def fetch_bedmap2(datasets, *, load=True, chunks=1000, **kwargs): relative to EIGEN-GL04C geoid (to convert back to WGS84, add this grid) .. warning :: - Loading a great number of datasets may require a fair amount of memory that - could crash your system. We recommend loading only the needed datasets. + Loading datasets into memory may require a fair amount of memory. + In order to prevent it the function loads the datasets as Dask arrays if + ``chunks`` is not ``None``. + Be careful when doing operations that loads the entire datasets into memory, + like plotting or performing some computations. .. warning :: Loading any dataset along with ``thickness_uncertainty_5km`` would modify the From b5c41be6e280cb9a6087bd893b88d21645100da0 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Wed, 5 Jun 2019 10:39:06 -0300 Subject: [PATCH 8/8] Fix docstring Co-Authored-By: Leonardo Uieda --- rockhound/bedmap2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rockhound/bedmap2.py b/rockhound/bedmap2.py index d767de91..6c4f2eb6 100644 --- a/rockhound/bedmap2.py +++ b/rockhound/bedmap2.py @@ -56,7 +56,7 @@ def fetch_bedmap2(datasets, *, load=True, chunks=1000, **kwargs): .. warning :: Loading datasets into memory may require a fair amount of memory. - In order to prevent it the function loads the datasets as Dask arrays if + In order to prevent this, the function loads the datasets as Dask arrays if ``chunks`` is not ``None``. Be careful when doing operations that loads the entire datasets into memory, like plotting or performing some computations.