cogeotiff · vincentsarago · Feb 14, 2022 · Feb 4, 2022 · Feb 7, 2022 · Feb 14, 2022
diff --git a/docs/readers.md b/docs/readers.md
@@ -430,10 +430,11 @@ EPSG:4326
 
 #### Methods
 
-The `STACReader` as the same methods as the `COGReader` (defined by the BaseReader/MultiBaseReader classes).
+The `STACReader` has the same methods as the `COGReader` (defined by the BaseReader/MultiBaseReader classes).
 
-!!! important
-    `STACReader` methods require to set either `assets=` or `expression=` option.
+!!! Important
+    - Most of `STACReader` methods require to set either `assets=` or `expression=` option.
+    - `asset_indexes` and `asset_expression` are available for all STACReader methods except `info`.
 
 - **tile()**: Read map tile from a STAC Item
 
@@ -504,9 +505,6 @@ with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac:
     assert img.count == 3
 ```
 
-`asset_indexes` and `asset_expression` are available for all STACReader methods expect `info`.
-
-
 - **part()**: Read a STAC item for a given bounding box (`bbox`). By default the bbox is considered to be in WGS84.
 
 ```python
@@ -593,21 +591,60 @@ print(info["B01"].json(exclude_none=True))
 }
 ```
 
-- **statistics()**: Return image statistics (Min/Max/Stdev)
+- **statistics()**: Return per assets statistics (Min/Max/Stdev)
 
 ```python
 with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac:
     # stac.statistics(assets=?, asset_expression=?, asset_indexes=?, **kwargs)
     stats = stac.statistics(assets=["B01", "B02"], max_size=128)
 
 # stats will be in form or {"asset": {"band": BandStatistics(), ...}, ...}
-print(list(info))
+print(list(stats))
 >>> ["B01", "B02"]
 
-print(list(info["B01"]))
+print(list(stats["B01"]))
 >>> ["1"]  # B01 has only one band entry "1"
 
-print(info["B01"]["1"].json(exclude_none=True))
+print(stats["B01"]["1"].json(exclude_none=True))
+{
+    "min": 283.0,
+    "max": 7734.0,
+    "mean": 1996.959687371452,
+    "count": 12155.0,
+    "sum": 24273045.0,
+    "std": 1218.4455268717047,
+    "median": 1866.0,
+    "majority": 322.0,
+    "minority": 283.0,
+    "unique": 4015.0,
+    "histogram": [
+        [3257.0, 2410.0, 2804.0, 1877.0, 1050.0, 423.0, 199.0, 93.0, 31.0, 11.0],
+        [283.0, 1028.1, 1773.2, 2518.3, 3263.4, 4008.5, 4753.6, 5498.7, 6243.8, 6988.900000000001, 7734.0]
+    ],
+    "valid_percent": 74.19,
+    "masked_pixels": 4229.0,
+    "valid_pixels": 12155.0,
+    "percentile_2": 326.08000000000004,
+    "percentile_98": 5026.76
+}
+```
+
+- **merged_statistics()**: Return statistics when merging assets
+
+The `merged_statistics` enable the use of `expression` to perform assets mixing (e.g `"asset1/asset2"`). The main difference with the `statistics` method is that we will first use the `self.preview` method to obtain a merged array and then calculate the statistics.
+
+```python
+with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac:
+    # stac.statistics(assets=?, asset_expression=?, asset_indexes=?, **kwargs)
+    stats = stac.merged_statistics(assets=["B01", "B02"], max_size=128)
+
+# stats will be in form or {"band": BandStatistics(), ...}
+print(list(stats))
+>>> ["B01_1", "B02_1"]
+
+assert isinstance(stats["B01_1"], BandStatistics)
+
+print(info["B01_1"].json(exclude_none=True))
 {
     "min": 283.0,
     "max": 7734.0,
@@ -629,4 +666,19 @@ print(info["B01"]["1"].json(exclude_none=True))
     "percentile_2": 326.08000000000004,
     "percentile_98": 5026.76
 }
+
+with STACReader(stac_url, exclude_assets={"thumbnail"},) as stac:
+    # stac.statistics(assets=?, asset_expression=?, asset_indexes=?, **kwargs)
+    stats = stac.merged_statistics(expression=["B01/B02"], max_size=128)
+
+print(list(stats))
+>>> ["B01/B02"]
+
+assert isinstance(stats["B01/B02"], BandStatistics)
 ```
+
+### STAC Expression
+
+When using `expression`, the reader might consider assets as `1 band` data. Expression using multi bands are not supported (e.g: `asset1_b1 + asset2_b2`).
+
+If assets have difference number of bands and the `asset_indexes` is not specified the process will fail because it will try to apply an expression using arrays of different sizes.
diff --git a/rio_tiler/io/base.py b/rio_tiler/io/base.py
@@ -452,6 +452,70 @@ def _reader(asset: str, *args, **kwargs) -> Dict:
 
         return multi_values(assets, _reader, **kwargs)
 
+    def merged_statistics(  # type: ignore
+        self,
+        assets: Union[Sequence[str], str] = None,
+        expression: Optional[str] = None,
+        asset_indexes: Optional[Dict[str, Indexes]] = None,  # Indexes for each asset
+        asset_expression: Optional[Dict[str, str]] = None,  # Expression for each asset
+        categorical: bool = False,
+        categories: Optional[List[float]] = None,
+        percentiles: List[int] = [2, 98],
+        hist_options: Optional[Dict] = None,
+        max_size: int = 1024,
+        **kwargs: Any,
+    ) -> Dict[str, BandStatistics]:
+        """Return array statistics for multiple assets.
+
+        Args:
+            assets (sequence of str or str): assets to fetch info from.
+            expression (str, optional): rio-tiler expression for the asset list (e.g. asset1/asset2+asset3).
+            asset_indexes (dict, optional): Band indexes for each asset (e.g {"asset1": 1, "asset2": (1, 2,)}).
+            asset_expression (dict, optional): rio-tiler expression for each asset (e.g. {"asset1": "b1/b2+b3", "asset2": ...}).
+            categorical (bool): treat input data as categorical data. Defaults to False.
+            categories (list of numbers, optional): list of categories to return value for.
+            percentiles (list of numbers, optional): list of percentile values to calculate. Defaults to `[2, 98]`.
+            hist_options (dict, optional): Options to forward to numpy.histogram function.
+            max_size (int, optional): Limit the size of the longest dimension of the dataset read, respecting bounds X/Y aspect ratio. Defaults to 1024.
+            kwargs (optional): Options to forward to the `self.preview` method.
+
+
+        Returns:
+            Dict[str, rio_tiler.models.BandStatistics]: bands statistics.
+
+        """
+        if not expression:
+            if not assets:
+                warnings.warn(
+                    "No `assets` option passed, will fetch statistics for all available assets.",
+                    UserWarning,
+                )
+            assets = assets or self.assets
+
+        data = self.preview(
+            assets=assets,
+            expression=expression,
+            asset_indexes=asset_indexes,
+            asset_expression=asset_expression,
+            max_size=max_size,
+            **kwargs,
+        )
+
+        hist_options = hist_options or {}
+
+        stats = get_array_statistics(
+            data.as_masked(),
+            categorical=categorical,
+            categories=categories,
+            percentiles=percentiles,
+            **hist_options,
+        )
+
+        return {
+            f"{data.band_names[ix]}": BandStatistics(**stats[ix])
+            for ix in range(len(stats))
+        }
+
     def tile(
         self,
         tile_x: int,

diff --git a/tests/test_io_stac.py b/tests/test_io_stac.py
@@ -429,6 +429,59 @@ def test_statistics_valid(rio):
         assert isinstance(stats["red"]["1"], BandStatistics)
 
 
+@patch("rio_tiler.io.cogeo.rasterio")
+def test_merged_statistics_valid(rio):
+    """Should raise or return data."""
+    rio.open = mock_rasterio_open
+
+    with STACReader(STAC_PATH) as stac:
+        with pytest.warns(UserWarning):
+            stats = stac.merged_statistics()
+            assert len(stats) == 3
+            assert isinstance(stats["red_1"], BandStatistics)
+            assert stats["red_1"]
+            assert stats["green_1"]
+            assert stats["blue_1"]
+
+        with pytest.raises(InvalidAssetName):
+            stac.merged_statistics(assets="vert")
+
+        stats = stac.merged_statistics(assets="green")
+        assert isinstance(stats["green_1"], BandStatistics)
+
+        stats = stac.merged_statistics(
+            assets=("green", "red"), hist_options={"bins": 20}
+        )
+        assert len(stats) == 2
+        assert len(stats["green_1"]["histogram"][0]) == 20
+        assert len(stats["red_1"]["histogram"][0]) == 20
+
+        # Check that asset_expression is passed
+        stats = stac.merged_statistics(
+            assets=("green", "red"), asset_expression={"green": "b1*2", "red": "b1+100"}
+        )
+        assert isinstance(stats["green_b1*2"], BandStatistics)
+        assert isinstance(stats["red_b1+100"], BandStatistics)
+
+        # Check that asset_indexes is passed
+        stats = stac.merged_statistics(
+            assets=("green", "red"), asset_indexes={"green": 1, "red": 1}
+        )
+        assert isinstance(stats["green_1"], BandStatistics)
+        assert isinstance(stats["red_1"], BandStatistics)
+
+        # Check Expression
+        stats = stac.merged_statistics(expression="green/red")
+        assert isinstance(stats["green/red"], BandStatistics)
+
+        # Check that we can use expression and asset_expression
+        stats = stac.merged_statistics(
+            expression="green/red",
+            asset_expression={"green": "b1*2", "red": "b1+100"},
+        )
+        assert isinstance(stats["green/red"], BandStatistics)
+
+
 @patch("rio_tiler.io.cogeo.rasterio")
 def test_info_valid(rio):
     """Should raise or return data."""