diff --git a/CHANGES.md b/CHANGES.md index 7a1cabad..b5a0df1a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ * add support for setting the S3 endpoint url scheme via the `AWS_HTTPS` environment variables in `aws_get_object` function using boto3 (https://github.com/cogeotiff/rio-tiler/pull/476) * Add semicolon `;` support for multi-blocks expression (https://github.com/cogeotiff/rio-tiler/pull/479) * add `rio_tiler.expression.get_expression_blocks` method to split expression (https://github.com/cogeotiff/rio-tiler/pull/479) +* add `merged_statistics` method for `MultiBaseReader` to get statistics using between assets expression (https://github.com/cogeotiff/rio-tiler/pull/478) **future deprecation** diff --git a/docs/readers.md b/docs/readers.md index 7cea7e0b..b23a2f6b 100644 --- a/docs/readers.md +++ b/docs/readers.md @@ -430,10 +430,11 @@ EPSG:4326 #### Methods -The `STACReader` as the same methods as the `COGReader` (defined by the BaseReader/MultiBaseReader classes). +The `STACReader` has the same methods as the `COGReader` (defined by the BaseReader/MultiBaseReader classes). -!!! important - `STACReader` methods require to set either `assets=` or `expression=` option. +!!! Important + - Most of `STACReader` methods require to set either `assets=` or `expression=` option. + - `asset_indexes` and `asset_expression` are available for all STACReader methods except `info`. - **tile()**: Read map tile from a STAC Item @@ -504,9 +505,6 @@ with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac: assert img.count == 3 ``` -`asset_indexes` and `asset_expression` are available for all STACReader methods expect `info`. - - - **part()**: Read a STAC item for a given bounding box (`bbox`). By default the bbox is considered to be in WGS84. ```python @@ -593,7 +591,7 @@ print(info["B01"].json(exclude_none=True)) } ``` -- **statistics()**: Return image statistics (Min/Max/Stdev) +- **statistics()**: Return per assets statistics (Min/Max/Stdev) ```python with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac: @@ -601,13 +599,52 @@ with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac: stats = stac.statistics(assets=["B01", "B02"], max_size=128) # stats will be in form or {"asset": {"band": BandStatistics(), ...}, ...} -print(list(info)) +print(list(stats)) >>> ["B01", "B02"] -print(list(info["B01"])) +print(list(stats["B01"])) >>> ["1"] # B01 has only one band entry "1" -print(info["B01"]["1"].json(exclude_none=True)) +print(stats["B01"]["1"].json(exclude_none=True)) +{ + "min": 283.0, + "max": 7734.0, + "mean": 1996.959687371452, + "count": 12155.0, + "sum": 24273045.0, + "std": 1218.4455268717047, + "median": 1866.0, + "majority": 322.0, + "minority": 283.0, + "unique": 4015.0, + "histogram": [ + [3257.0, 2410.0, 2804.0, 1877.0, 1050.0, 423.0, 199.0, 93.0, 31.0, 11.0], + [283.0, 1028.1, 1773.2, 2518.3, 3263.4, 4008.5, 4753.6, 5498.7, 6243.8, 6988.900000000001, 7734.0] + ], + "valid_percent": 74.19, + "masked_pixels": 4229.0, + "valid_pixels": 12155.0, + "percentile_2": 326.08000000000004, + "percentile_98": 5026.76 +} +``` + +- **merged_statistics()**: Return statistics when merging assets + +The `merged_statistics` enable the use of `expression` to perform assets mixing (e.g `"asset1/asset2"`). The main difference with the `statistics` method is that we will first use the `self.preview` method to obtain a merged array and then calculate the statistics. + +```python +with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac: + # stac.statistics(assets=?, asset_expression=?, asset_indexes=?, **kwargs) + stats = stac.merged_statistics(assets=["B01", "B02"], max_size=128) + +# stats will be in form or {"band": BandStatistics(), ...} +print(list(stats)) +>>> ["B01_1", "B02_1"] + +assert isinstance(stats["B01_1"], BandStatistics) + +print(info["B01_1"].json(exclude_none=True)) { "min": 283.0, "max": 7734.0, @@ -629,4 +666,19 @@ print(info["B01"]["1"].json(exclude_none=True)) "percentile_2": 326.08000000000004, "percentile_98": 5026.76 } + +with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac: + # stac.statistics(assets=?, asset_expression=?, asset_indexes=?, **kwargs) + stats = stac.merged_statistics(expression=["B01/B02"], max_size=128) + +print(list(stats)) +>>> ["B01/B02"] + +assert isinstance(stats["B01/B02"], BandStatistics) ``` + +### STAC Expression + +When using `expression`, the reader might consider assets as `1 band` data. Expression using multi bands are not supported (e.g: `asset1_b1 + asset2_b2`). + +If assets have difference number of bands and the `asset_indexes` is not specified the process will fail because it will try to apply an expression using arrays of different sizes. diff --git a/rio_tiler/io/base.py b/rio_tiler/io/base.py index d523fc60..912f02a9 100644 --- a/rio_tiler/io/base.py +++ b/rio_tiler/io/base.py @@ -452,6 +452,70 @@ def _reader(asset: str, *args, **kwargs) -> Dict: return multi_values(assets, _reader, **kwargs) + def merged_statistics( # type: ignore + self, + assets: Union[Sequence[str], str] = None, + expression: Optional[str] = None, + asset_indexes: Optional[Dict[str, Indexes]] = None, # Indexes for each asset + asset_expression: Optional[Dict[str, str]] = None, # Expression for each asset + categorical: bool = False, + categories: Optional[List[float]] = None, + percentiles: List[int] = [2, 98], + hist_options: Optional[Dict] = None, + max_size: int = 1024, + **kwargs: Any, + ) -> Dict[str, BandStatistics]: + """Return array statistics for multiple assets. + + Args: + assets (sequence of str or str): assets to fetch info from. + expression (str, optional): rio-tiler expression for the asset list (e.g. asset1/asset2+asset3). + asset_indexes (dict, optional): Band indexes for each asset (e.g {"asset1": 1, "asset2": (1, 2,)}). + asset_expression (dict, optional): rio-tiler expression for each asset (e.g. {"asset1": "b1/b2+b3", "asset2": ...}). + categorical (bool): treat input data as categorical data. Defaults to False. + categories (list of numbers, optional): list of categories to return value for. + percentiles (list of numbers, optional): list of percentile values to calculate. Defaults to `[2, 98]`. + hist_options (dict, optional): Options to forward to numpy.histogram function. + max_size (int, optional): Limit the size of the longest dimension of the dataset read, respecting bounds X/Y aspect ratio. Defaults to 1024. + kwargs (optional): Options to forward to the `self.preview` method. + + + Returns: + Dict[str, rio_tiler.models.BandStatistics]: bands statistics. + + """ + if not expression: + if not assets: + warnings.warn( + "No `assets` option passed, will fetch statistics for all available assets.", + UserWarning, + ) + assets = assets or self.assets + + data = self.preview( + assets=assets, + expression=expression, + asset_indexes=asset_indexes, + asset_expression=asset_expression, + max_size=max_size, + **kwargs, + ) + + hist_options = hist_options or {} + + stats = get_array_statistics( + data.as_masked(), + categorical=categorical, + categories=categories, + percentiles=percentiles, + **hist_options, + ) + + return { + f"{data.band_names[ix]}": BandStatistics(**stats[ix]) + for ix in range(len(stats)) + } + def tile( self, tile_x: int, diff --git a/tests/test_io_stac.py b/tests/test_io_stac.py index 8d0d4a06..51bc6816 100644 --- a/tests/test_io_stac.py +++ b/tests/test_io_stac.py @@ -429,6 +429,59 @@ def test_statistics_valid(rio): assert isinstance(stats["red"]["1"], BandStatistics) +@patch("rio_tiler.io.cogeo.rasterio") +def test_merged_statistics_valid(rio): + """Should raise or return data.""" + rio.open = mock_rasterio_open + + with STACReader(STAC_PATH) as stac: + with pytest.warns(UserWarning): + stats = stac.merged_statistics() + assert len(stats) == 3 + assert isinstance(stats["red_1"], BandStatistics) + assert stats["red_1"] + assert stats["green_1"] + assert stats["blue_1"] + + with pytest.raises(InvalidAssetName): + stac.merged_statistics(assets="vert") + + stats = stac.merged_statistics(assets="green") + assert isinstance(stats["green_1"], BandStatistics) + + stats = stac.merged_statistics( + assets=("green", "red"), hist_options={"bins": 20} + ) + assert len(stats) == 2 + assert len(stats["green_1"]["histogram"][0]) == 20 + assert len(stats["red_1"]["histogram"][0]) == 20 + + # Check that asset_expression is passed + stats = stac.merged_statistics( + assets=("green", "red"), asset_expression={"green": "b1*2", "red": "b1+100"} + ) + assert isinstance(stats["green_b1*2"], BandStatistics) + assert isinstance(stats["red_b1+100"], BandStatistics) + + # Check that asset_indexes is passed + stats = stac.merged_statistics( + assets=("green", "red"), asset_indexes={"green": 1, "red": 1} + ) + assert isinstance(stats["green_1"], BandStatistics) + assert isinstance(stats["red_1"], BandStatistics) + + # Check Expression + stats = stac.merged_statistics(expression="green/red") + assert isinstance(stats["green/red"], BandStatistics) + + # Check that we can use expression and asset_expression + stats = stac.merged_statistics( + expression="green/red", + asset_expression={"green": "b1*2", "red": "b1+100"}, + ) + assert isinstance(stats["green/red"], BandStatistics) + + @patch("rio_tiler.io.cogeo.rasterio") def test_info_valid(rio): """Should raise or return data."""