Skip to content

Commit

Permalink
Merge branch 'main' of github.com:natcap/pygeoprocessing into bugfix/…
Browse files Browse the repository at this point in the history
…326-expose-overview-level-in-warp-raster

Conflicts:
	HISTORY.rst
phargogh committed Aug 27, 2023
2 parents b5d7eed + 5404b55 commit a8a439e
Showing 7 changed files with 486 additions and 52 deletions.
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
@@ -16,6 +16,10 @@ Unreleased Changes
* Added a new function, ``pygeoprocessing.array_equals_nodata``, which returns
a boolean array indicating which elements have nodata. It handles integer,
float, and ``nan`` comparison, and the case where the nodata value is `None`.
* Standardized the approach used in ``warp_raster`` and
``create_raster_from_bounding_box`` for determining the dimensions of the
target raster given a target bounding box and pixel sizes.
https://github.com/natcap/pygeoprocessing/issues/321
* Users may now specify the overview level to use when calling ``warp_raster``.
By default, ``pygeoprocessing`` will use the base layer.
https://github.com/natcap/pygeoprocessing/issues/326
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@
# that we can provide a much easier build experience so long as GDAL is
# available at runtime.
requires = [
'setuptools', 'wheel', 'setuptools_scm', 'cython', 'oldest-supported-numpy'
'setuptools', 'wheel', 'setuptools_scm', 'cython<3.0.0', 'oldest-supported-numpy'
]
build-backend = "setuptools.build_meta"

2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@
# --------------------
# This file records the packages and requirements needed in order for
# pygeoprocessing to work as expected.
Cython
Cython<3.0.0
GDAL>=3.0.4
numpy>=1.10.1
Rtree>=0.8.3
3 changes: 3 additions & 0 deletions src/pygeoprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,8 @@
from .geoprocessing import array_equals_nodata
from .geoprocessing import build_overviews
from .geoprocessing import calculate_disjoint_polygon_set
from .geoprocessing import choose_dtype
from .geoprocessing import choose_nodata
from .geoprocessing import convolve_2d
from .geoprocessing import create_raster_from_bounding_box
from .geoprocessing import create_raster_from_vector_extents
@@ -32,6 +34,7 @@
from .geoprocessing import new_raster_from_base
from .geoprocessing import numpy_array_to_raster
from .geoprocessing import raster_calculator
from .geoprocessing import raster_map
from .geoprocessing import raster_reduce
from .geoprocessing import raster_to_numpy_array
from .geoprocessing import rasterize
208 changes: 178 additions & 30 deletions src/pygeoprocessing/geoprocessing.py
Original file line number Diff line number Diff line change
@@ -32,12 +32,28 @@

from . import geoprocessing_core
from .geoprocessing_core import DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS
from .geoprocessing_core import DEFAULT_CREATION_OPTIONS
from .geoprocessing_core import INT8_CREATION_OPTIONS
from .geoprocessing_core import DEFAULT_OSR_AXIS_MAPPING_STRATEGY

# This is used to efficiently pass data to the raster stats worker if available
if sys.version_info >= (3, 8):
import multiprocessing.shared_memory

NUMPY_TO_GDAL_TYPE = {
numpy.dtype(bool): gdal.GDT_Byte,
numpy.dtype(numpy.int8): gdal.GDT_Byte,
numpy.dtype(numpy.uint8): gdal.GDT_Byte,
numpy.dtype(numpy.int16): gdal.GDT_Int16,
numpy.dtype(numpy.int32): gdal.GDT_Int32,
numpy.dtype(numpy.uint16): gdal.GDT_UInt16,
numpy.dtype(numpy.uint32): gdal.GDT_UInt32,
numpy.dtype(numpy.float32): gdal.GDT_Float32,
numpy.dtype(numpy.float64): gdal.GDT_Float64,
numpy.dtype(numpy.csingle): gdal.GDT_CFloat32,
numpy.dtype(numpy.complex64): gdal.GDT_CFloat64,
}


class ReclassificationMissingValuesError(Exception):
"""Raised when a raster value is not a valid key to a dictionary.
@@ -662,6 +678,128 @@ def array_equals_nodata(array, nodata):
return numpy.isclose(array, nodata, equal_nan=True)


def choose_dtype(*raster_paths):
"""
Choose an appropriate dtype for an output derived from the given inputs.
Returns the dtype with the greatest size/precision among the inputs, so
that information will not be lost.
Args:
*raster_paths: series of raster path strings
Returns:
numpy dtype
"""
dtypes = [get_raster_info(path)['numpy_type'] for path in raster_paths]
return numpy.result_type(*dtypes)


def choose_nodata(dtype):
"""
Choose an appropriate nodata value for data of a given dtype.
Args:
dtype (numpy.dtype): data type for which to choose nodata
Returns:
number to use as nodata value
"""
try:
return float(numpy.finfo(dtype).max)
except ValueError:
return int(numpy.iinfo(dtype).max)


def raster_map(op, rasters, target_path, target_nodata=None, target_dtype=None,
raster_driver_creation_tuple=DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS):
"""Apply a pixelwise function to a series of raster inputs.
The output raster will have nodata where any input raster has nodata.
Raster inputs are split into aligned blocks, and the function is
applied individually to each stack of blocks (as numpy arrays).
Args:
op (function): Function to apply to the inputs. It should accept a
number of arguments equal to the length of ``*inputs``. It should
return a numpy array with the same shape as its array input(s).
rasters (list[str]): Paths to rasters to input to ``op``, in the order
that they will be passed to ``op``. All rasters should be aligned
and have the same dimensions.
target_path (str): path to write out the output raster.
target_nodata (number): Nodata value to use for the output raster.
Optional. If not provided, a suitable nodata value will be chosen.
target_dtype (numpy.dtype): dtype to use for the output. Optional. If
not provided, a suitable dtype will be chosen.
raster_driver_creation_tuple (tuple): a tuple containing a GDAL driver
name string as the first element and a GDAL creation options
tuple/list as the second. Defaults to
geoprocessing.DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS. If the
``target_dtype`` is int8, the ``PIXELTYPE=SIGNEDBYTE`` option will
be added to the creation options tuple if it is not already there.
Returns:
``None``
"""
nodatas = []
for raster in rasters:
raster_info = get_raster_info(raster)
if raster_info['n_bands'] > 1:
LOGGER.warning(f'{raster} has more than one band. Only the first '
'band will be used.')
nodatas.append(raster_info['nodata'][0])

# choose an appropriate dtype if none was given
if target_dtype is None:
target_dtype = choose_dtype(*rasters)

# choose an appropriate nodata value if none was given
# if the user provides a target nodata,
# check that it can fit in the target dtype
if target_nodata is None:
target_nodata = choose_nodata(target_dtype)
else:
if not numpy.can_cast(target_nodata, target_dtype):
raise ValueError(
f'Target nodata value {target_nodata} is incompatible with '
f'the target dtype {target_dtype}')

driver, options = raster_driver_creation_tuple
if target_dtype == numpy.int8 and 'PIXELTYPE=SIGNEDBYTE' not in options:
options += ('PIXELTYPE=SIGNEDBYTE',)

def apply_op(*arrays):
"""Apply the function ``op`` to the input arrays.
Args:
*arrays: numpy arrays with the same shape.
Returns:
numpy array
"""
result = numpy.full(arrays[0].shape, target_nodata, dtype=target_dtype)

# make a mask that is True where all input arrays are valid,
# and False where any input array is invalid.
valid_mask = numpy.full(arrays[0].shape, True)
for array, nodata in zip(arrays, nodatas):
valid_mask &= ~array_equals_nodata(array, nodata)

# mask all arrays to the area where they all are valid
masked_arrays = [array[valid_mask] for array in arrays]
# apply op to the masked arrays in order
result[valid_mask] = op(*masked_arrays)
return result

raster_calculator(
[(path, 1) for path in rasters], # assume the first band
apply_op,
target_path,
NUMPY_TO_GDAL_TYPE[numpy.dtype(target_dtype)],
target_nodata,
raster_driver_creation_tuple=(driver, options))


def raster_reduce(function, raster_path_band, initializer, mask_nodata=True,
largest_block=_LARGEST_ITERBLOCK):
"""Cumulatively apply a reducing function to each block of a raster.
@@ -1288,17 +1426,38 @@ def create_raster_from_bounding_box(

driver = gdal.GetDriverByName(raster_driver_creation_tuple[0])
n_bands = 1
n_cols = int(numpy.ceil(
abs((bbox_maxx - bbox_minx) / target_pixel_size[0])))
n_cols = max(1, n_cols)

n_rows = int(numpy.ceil(
abs((bbox_maxy - bbox_miny) / target_pixel_size[1])))
n_rows = max(1, n_rows)
# determine the raster size that bounds the input bounding box and then
# adjust the bounding box to be that size
target_x_size = int(abs(
float(bbox_maxx - bbox_minx) / target_pixel_size[0]))
target_y_size = int(abs(
float(bbox_maxy - bbox_miny) / target_pixel_size[1]))
x_residual = (
abs(target_x_size * target_pixel_size[0]) -
(bbox_maxx - bbox_minx))
if not numpy.isclose(x_residual, 0.0):
target_x_size += 1
y_residual = (
abs(target_y_size * target_pixel_size[1]) -
(bbox_maxy - bbox_miny))
if not numpy.isclose(y_residual, 0.0):
target_y_size += 1

if target_x_size == 0:
LOGGER.warning(
"bounding_box is so small that x dimension rounds to 0; "
"clamping to 1.")
target_x_size = 1
if target_y_size == 0:
LOGGER.warning(
"bounding_box is so small that y dimension rounds to 0; "
"clamping to 1.")
target_y_size = 1

raster = driver.Create(
target_raster_path, n_cols, n_rows, n_bands, target_pixel_type,
options=raster_driver_creation_tuple[1])
target_raster_path, target_x_size, target_y_size, n_bands,
target_pixel_type, options=raster_driver_creation_tuple[1])
raster.SetProjection(target_srs_wkt)

# Set the transform based on the upper left corner and given pixel
@@ -1522,6 +1681,7 @@ def zonal_statistics(
raise RuntimeError(
f"Could not open layer {aggregate_layer_name} of {aggregate_vector_path}")


# Define the default/empty statistics values
# These values will be returned for features that have no geometry or
# don't overlap any valid pixels.
@@ -1549,6 +1709,7 @@ def default_aggregate_dict():
target_layer.CreateField(ogr.FieldDefn(fid_field_name, ogr.OFTInteger))
valid_fid_set = set()
aggregate_stats_list = [{} for _ in base_raster_path_band]
original_to_new_fid_map = {}
for feature in aggregate_layer:
fid = feature.GetFID()
# Initialize the output data structure:
@@ -1565,7 +1726,8 @@ def default_aggregate_dict():
feature_copy.SetGeometry(geom_ref.Clone())
feature_copy.SetField(fid_field_name, fid)
target_layer.CreateFeature(feature_copy)
target_layer, target_vector, feature, = None, None, None
original_to_new_fid_map[fid] = feature_copy.GetFID()
target_layer, target_vector, feature, feature_copy = None, None, None, None
geom_ref, aggregate_layer, aggregate_vector = None, None, None

# Reproject the vector to match the raster projection
@@ -1708,7 +1870,7 @@ def default_aggregate_dict():
aggregate_stats_list[i][fid]['value_counts'].update(
dict(zip(*numpy.unique(
feature_data, return_counts=True))))

fid_band, fid_raster = None, None
# Handle edge cases: features that have a geometry but do not
# overlap the center point of any pixel will not be captured by the
# method above.
@@ -1718,10 +1880,9 @@ def default_aggregate_dict():
raster_nodata = get_raster_info(raster_path)['nodata'][band - 1]
target_layer = target_vector.GetLayerByName(target_layer_id)
for unset_fid in unset_fids:
# Look up by the FID copy field, not the FID itself, because
# Look up by the new FID
# FIDs in target_layer may not be the same as in the input layer
target_layer.SetAttributeFilter(f'{fid_field_name} = {unset_fid}')
unset_feat = target_layer.GetNextFeature()
unset_feat = target_layer.GetFeature(original_to_new_fid_map[unset_fid])
unset_geom_ref = unset_feat.GetGeometryRef()

geom_x_min, geom_x_max, geom_y_min, geom_y_max = unset_geom_ref.GetEnvelope()
@@ -1798,9 +1959,8 @@ def default_aggregate_dict():
f'all done processing polygon sets for {os.path.basename(aggregate_vector_path)}')

# dereference gdal objects
data_band, data_source, fid_raster = None, None, None
disjoint_layer, aggregate_layer, aggregate_vector = None, None, None
target_layer, target_vector = None, None
data_band, data_source = None, None
disjoint_layer, target_layer, target_vector = None, None, None

shutil.rmtree(temp_working_dir)
if multi_raster_mode:
@@ -3975,23 +4135,11 @@ def numpy_array_to_raster(
Return:
None
"""
numpy_to_gdal_type = {
numpy.dtype(bool): gdal.GDT_Byte,
numpy.dtype(numpy.int8): gdal.GDT_Byte,
numpy.dtype(numpy.uint8): gdal.GDT_Byte,
numpy.dtype(numpy.int16): gdal.GDT_Int16,
numpy.dtype(numpy.int32): gdal.GDT_Int32,
numpy.dtype(numpy.uint16): gdal.GDT_UInt16,
numpy.dtype(numpy.uint32): gdal.GDT_UInt32,
numpy.dtype(numpy.float32): gdal.GDT_Float32,
numpy.dtype(numpy.float64): gdal.GDT_Float64,
numpy.dtype(numpy.csingle): gdal.GDT_CFloat32,
numpy.dtype(numpy.complex64): gdal.GDT_CFloat64,
}

raster_driver = gdal.GetDriverByName(raster_driver_creation_tuple[0])
ny, nx = base_array.shape
new_raster = raster_driver.Create(
target_path, nx, ny, 1, numpy_to_gdal_type[base_array.dtype],
target_path, nx, ny, 1, NUMPY_TO_GDAL_TYPE[base_array.dtype],
options=raster_driver_creation_tuple[1])
if projection_wkt is not None:
new_raster.SetProjection(projection_wkt)
10 changes: 7 additions & 3 deletions src/pygeoprocessing/geoprocessing_core.pyx
Original file line number Diff line number Diff line change
@@ -28,9 +28,13 @@ from osgeo import osr
import numpy
import pygeoprocessing

DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS = ('GTIFF', (
'TILED=YES', 'BIGTIFF=YES', 'COMPRESS=LZW',
'BLOCKXSIZE=256', 'BLOCKYSIZE=256'))

DEFAULT_CREATION_OPTIONS = ('TILED=YES', 'BIGTIFF=YES', 'COMPRESS=LZW',
'BLOCKXSIZE=256', 'BLOCKYSIZE=256')
INT8_CREATION_OPTIONS = DEFAULT_CREATION_OPTIONS + (
'PIXELTYPE=SIGNEDBYTE',)
DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS = ('GTIFF', DEFAULT_CREATION_OPTIONS)
INT8_GTIFF_CREATION_TUPLE_OPTIONS = ('GTIFF', INT8_CREATION_OPTIONS)

# In GDAL 3.0 spatial references no longer ignore Geographic CRS Axis Order
# and conform to Lat first, Lon Second. Transforms expect (lat, lon) order
309 changes: 292 additions & 17 deletions tests/test_geoprocessing.py

Large diffs are not rendered by default.

0 comments on commit a8a439e

Please sign in to comment.