Skip to content

Commit

Permalink
add merged_statistics for multiBaseReader (#478)
Browse files Browse the repository at this point in the history
* add `merged_statistics` for multiBaseReader

* update changelog
  • Loading branch information
vincentsarago authored Feb 14, 2022
1 parent fc1af18 commit 90f57e7
Show file tree
Hide file tree
Showing 4 changed files with 180 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* add support for setting the S3 endpoint url scheme via the `AWS_HTTPS` environment variables in `aws_get_object` function using boto3 (https://github.com/cogeotiff/rio-tiler/pull/476)
* Add semicolon `;` support for multi-blocks expression (https://github.com/cogeotiff/rio-tiler/pull/479)
* add `rio_tiler.expression.get_expression_blocks` method to split expression (https://github.com/cogeotiff/rio-tiler/pull/479)
* add `merged_statistics` method for `MultiBaseReader` to get statistics using between assets expression (https://github.com/cogeotiff/rio-tiler/pull/478)

**future deprecation**

Expand Down
72 changes: 62 additions & 10 deletions docs/readers.md
Original file line number Diff line number Diff line change
Expand Up @@ -430,10 +430,11 @@ EPSG:4326

#### Methods

The `STACReader` as the same methods as the `COGReader` (defined by the BaseReader/MultiBaseReader classes).
The `STACReader` has the same methods as the `COGReader` (defined by the BaseReader/MultiBaseReader classes).

!!! important
`STACReader` methods require to set either `assets=` or `expression=` option.
!!! Important
- Most of `STACReader` methods require to set either `assets=` or `expression=` option.
- `asset_indexes` and `asset_expression` are available for all STACReader methods except `info`.

- **tile()**: Read map tile from a STAC Item

Expand Down Expand Up @@ -504,9 +505,6 @@ with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac:
assert img.count == 3
```

`asset_indexes` and `asset_expression` are available for all STACReader methods expect `info`.


- **part()**: Read a STAC item for a given bounding box (`bbox`). By default the bbox is considered to be in WGS84.

```python
Expand Down Expand Up @@ -593,21 +591,60 @@ print(info["B01"].json(exclude_none=True))
}
```

- **statistics()**: Return image statistics (Min/Max/Stdev)
- **statistics()**: Return per assets statistics (Min/Max/Stdev)

```python
with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac:
# stac.statistics(assets=?, asset_expression=?, asset_indexes=?, **kwargs)
stats = stac.statistics(assets=["B01", "B02"], max_size=128)

# stats will be in form or {"asset": {"band": BandStatistics(), ...}, ...}
print(list(info))
print(list(stats))
>>> ["B01", "B02"]

print(list(info["B01"]))
print(list(stats["B01"]))
>>> ["1"] # B01 has only one band entry "1"

print(info["B01"]["1"].json(exclude_none=True))
print(stats["B01"]["1"].json(exclude_none=True))
{
"min": 283.0,
"max": 7734.0,
"mean": 1996.959687371452,
"count": 12155.0,
"sum": 24273045.0,
"std": 1218.4455268717047,
"median": 1866.0,
"majority": 322.0,
"minority": 283.0,
"unique": 4015.0,
"histogram": [
[3257.0, 2410.0, 2804.0, 1877.0, 1050.0, 423.0, 199.0, 93.0, 31.0, 11.0],
[283.0, 1028.1, 1773.2, 2518.3, 3263.4, 4008.5, 4753.6, 5498.7, 6243.8, 6988.900000000001, 7734.0]
],
"valid_percent": 74.19,
"masked_pixels": 4229.0,
"valid_pixels": 12155.0,
"percentile_2": 326.08000000000004,
"percentile_98": 5026.76
}
```

- **merged_statistics()**: Return statistics when merging assets

The `merged_statistics` enable the use of `expression` to perform assets mixing (e.g `"asset1/asset2"`). The main difference with the `statistics` method is that we will first use the `self.preview` method to obtain a merged array and then calculate the statistics.

```python
with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac:
# stac.statistics(assets=?, asset_expression=?, asset_indexes=?, **kwargs)
stats = stac.merged_statistics(assets=["B01", "B02"], max_size=128)

# stats will be in form or {"band": BandStatistics(), ...}
print(list(stats))
>>> ["B01_1", "B02_1"]

assert isinstance(stats["B01_1"], BandStatistics)

print(info["B01_1"].json(exclude_none=True))
{
"min": 283.0,
"max": 7734.0,
Expand All @@ -629,4 +666,19 @@ print(info["B01"]["1"].json(exclude_none=True))
"percentile_2": 326.08000000000004,
"percentile_98": 5026.76
}

with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac:
# stac.statistics(assets=?, asset_expression=?, asset_indexes=?, **kwargs)
stats = stac.merged_statistics(expression=["B01/B02"], max_size=128)

print(list(stats))
>>> ["B01/B02"]

assert isinstance(stats["B01/B02"], BandStatistics)
```

### STAC Expression

When using `expression`, the reader might consider assets as `1 band` data. Expression using multi bands are not supported (e.g: `asset1_b1 + asset2_b2`).

If assets have difference number of bands and the `asset_indexes` is not specified the process will fail because it will try to apply an expression using arrays of different sizes.
64 changes: 64 additions & 0 deletions rio_tiler/io/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,70 @@ def _reader(asset: str, *args, **kwargs) -> Dict:

return multi_values(assets, _reader, **kwargs)

def merged_statistics( # type: ignore
self,
assets: Union[Sequence[str], str] = None,
expression: Optional[str] = None,
asset_indexes: Optional[Dict[str, Indexes]] = None, # Indexes for each asset
asset_expression: Optional[Dict[str, str]] = None, # Expression for each asset
categorical: bool = False,
categories: Optional[List[float]] = None,
percentiles: List[int] = [2, 98],
hist_options: Optional[Dict] = None,
max_size: int = 1024,
**kwargs: Any,
) -> Dict[str, BandStatistics]:
"""Return array statistics for multiple assets.
Args:
assets (sequence of str or str): assets to fetch info from.
expression (str, optional): rio-tiler expression for the asset list (e.g. asset1/asset2+asset3).
asset_indexes (dict, optional): Band indexes for each asset (e.g {"asset1": 1, "asset2": (1, 2,)}).
asset_expression (dict, optional): rio-tiler expression for each asset (e.g. {"asset1": "b1/b2+b3", "asset2": ...}).
categorical (bool): treat input data as categorical data. Defaults to False.
categories (list of numbers, optional): list of categories to return value for.
percentiles (list of numbers, optional): list of percentile values to calculate. Defaults to `[2, 98]`.
hist_options (dict, optional): Options to forward to numpy.histogram function.
max_size (int, optional): Limit the size of the longest dimension of the dataset read, respecting bounds X/Y aspect ratio. Defaults to 1024.
kwargs (optional): Options to forward to the `self.preview` method.
Returns:
Dict[str, rio_tiler.models.BandStatistics]: bands statistics.
"""
if not expression:
if not assets:
warnings.warn(
"No `assets` option passed, will fetch statistics for all available assets.",
UserWarning,
)
assets = assets or self.assets

data = self.preview(
assets=assets,
expression=expression,
asset_indexes=asset_indexes,
asset_expression=asset_expression,
max_size=max_size,
**kwargs,
)

hist_options = hist_options or {}

stats = get_array_statistics(
data.as_masked(),
categorical=categorical,
categories=categories,
percentiles=percentiles,
**hist_options,
)

return {
f"{data.band_names[ix]}": BandStatistics(**stats[ix])
for ix in range(len(stats))
}

def tile(
self,
tile_x: int,
Expand Down
53 changes: 53 additions & 0 deletions tests/test_io_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,59 @@ def test_statistics_valid(rio):
assert isinstance(stats["red"]["1"], BandStatistics)


@patch("rio_tiler.io.cogeo.rasterio")
def test_merged_statistics_valid(rio):
"""Should raise or return data."""
rio.open = mock_rasterio_open

with STACReader(STAC_PATH) as stac:
with pytest.warns(UserWarning):
stats = stac.merged_statistics()
assert len(stats) == 3
assert isinstance(stats["red_1"], BandStatistics)
assert stats["red_1"]
assert stats["green_1"]
assert stats["blue_1"]

with pytest.raises(InvalidAssetName):
stac.merged_statistics(assets="vert")

stats = stac.merged_statistics(assets="green")
assert isinstance(stats["green_1"], BandStatistics)

stats = stac.merged_statistics(
assets=("green", "red"), hist_options={"bins": 20}
)
assert len(stats) == 2
assert len(stats["green_1"]["histogram"][0]) == 20
assert len(stats["red_1"]["histogram"][0]) == 20

# Check that asset_expression is passed
stats = stac.merged_statistics(
assets=("green", "red"), asset_expression={"green": "b1*2", "red": "b1+100"}
)
assert isinstance(stats["green_b1*2"], BandStatistics)
assert isinstance(stats["red_b1+100"], BandStatistics)

# Check that asset_indexes is passed
stats = stac.merged_statistics(
assets=("green", "red"), asset_indexes={"green": 1, "red": 1}
)
assert isinstance(stats["green_1"], BandStatistics)
assert isinstance(stats["red_1"], BandStatistics)

# Check Expression
stats = stac.merged_statistics(expression="green/red")
assert isinstance(stats["green/red"], BandStatistics)

# Check that we can use expression and asset_expression
stats = stac.merged_statistics(
expression="green/red",
asset_expression={"green": "b1*2", "red": "b1+100"},
)
assert isinstance(stats["green/red"], BandStatistics)


@patch("rio_tiler.io.cogeo.rasterio")
def test_info_valid(rio):
"""Should raise or return data."""
Expand Down

0 comments on commit 90f57e7

Please sign in to comment.