From 4412bcd8f74e8417703efcc6a4c971b28f84bc6f Mon Sep 17 00:00:00 2001
From: Jonas Hoersch <jonas.hoersch@openenergytransition.org>
Date: Fri, 1 Nov 2024 21:47:59 +0100
Subject: [PATCH] feat(wind): Bias correct wind speeds based on scaling to a
 known average

Adds a new function :py:func:`atlite.datasets.era5.retrieve_average_windspeed`
to retrieve average windspeeds. This dataset function is called by
:py:func:`atlite.wind.calculate_windspeed_bias_correction` to derive a bias
correction which can be passed to the default wind generation.

Example usage:

```python
windspeed_bias_correction = atlite.wind.calculate_windspeed_bias_correction(
    cutout,
    real_average="gwa3_250_windspeed_100m.tif",
    height=100,
)
cutout.wind(
    atlite.windturbines.Enercon_E101_3000kW,
    windspeed_bias_correction=windspeed_bias_correction,
    windspeed_height=100,
)
```
---
 atlite/convert.py       | 73 +++++++++++++++++++++++----------
 atlite/datasets/era5.py | 61 ++++++++++++++++++++++++---
 atlite/wind.py          | 91 +++++++++++++++++++++++++++++++++++------
 3 files changed, 187 insertions(+), 38 deletions(-)

diff --git a/atlite/convert.py b/atlite/convert.py
index c165cfa3..b06b62f8 100644
--- a/atlite/convert.py
+++ b/atlite/convert.py
@@ -478,19 +478,28 @@ def solar_thermal(
 
 
 # wind
-def convert_wind(ds, turbine):
+def convert_wind(
+    ds: xr.Dataset, turbine, windspeed_bias_correction=None, from_height=None
+):
     """
     Convert wind speeds for turbine to wind energy generation.
     """
     V, POW, hub_height, P = itemgetter("V", "POW", "hub_height", "P")(turbine)
 
-    wnd_hub = windm.extrapolate_wind_speed(ds, to_height=hub_height)
+    if windspeed_bias_correction is not None:
+        ds = ds.assign(
+            {f"wnd{from_height}m": ds[f"wnd{from_height}m"] * windspeed_bias_correction}
+        )
+
+    wnd_hub = windm.extrapolate_wind_speed(
+        ds, to_height=hub_height, from_height=from_height
+    )
 
-    def _interpolate(da):
+    def apply_power_curve(da):
         return np.interp(da, V, POW / P)
 
     da = xr.apply_ufunc(
-        _interpolate,
+        apply_power_curve,
         wnd_hub,
         input_core_dims=[[]],
         output_core_dims=[[]],
@@ -503,7 +512,15 @@ def _interpolate(da):
     return da
 
 
-def wind(cutout, turbine, smooth=False, add_cutout_windspeed=False, **params):
+def wind(
+    cutout,
+    turbine: str | dict,
+    smooth: bool | dict = False,
+    add_cutout_windspeed: bool = False,
+    windspeed_bias_correction: xr.DataArray | None = None,
+    windspeed_height: int | None = None,
+    **params,
+):
     """
     Generate wind generation time-series.
 
@@ -513,22 +530,32 @@ def wind(cutout, turbine, smooth=False, add_cutout_windspeed=False, **params):
     Parameters
     ----------
     turbine : str or dict
-        A turbineconfig dictionary with the keys 'hub_height' for the
-        hub height and 'V', 'POW' defining the power curve.
-        Alternatively a str refering to a local or remote turbine configuration
-        as accepted by atlite.resource.get_windturbineconfig(). Locally stored turbine
-        configurations can also be modified with this function. E.g. to setup a different hub
-        height from the one used in the yaml file,one would write
-                "turbine=get_windturbineconfig(“NREL_ReferenceTurbine_5MW_offshore”)|{“hub_height”:120}"
+        A turbineconfig dictionary with the keys 'hub_height' for the hub height
+        and 'V', 'POW' defining the power curve. Alternatively a str refering to
+        a local or remote turbine configuration as accepted by
+        atlite.resource.get_windturbineconfig(). Locally stored turbine
+        configurations can also be modified with this function. E.g. to setup a
+        different hub height from the one used in the yaml file, one would write
+        >>> turbine = (
+        >>>     get_windturbineconfig("NREL_ReferenceTurbine_5MW_offshore")
+        >>>     | {"hub_height":120}
+        >>> )
     smooth : bool or dict
-        If True smooth power curve with a gaussian kernel as
-        determined for the Danish wind fleet to Delta_v = 1.27 and
-        sigma = 2.29. A dict allows to tune these values.
+        If True smooth power curve with a gaussian kernel as determined for the
+        Danish wind fleet to Delta_v = 1.27 and sigma = 2.29. A dict allows to
+        tune these values.
     add_cutout_windspeed : bool
-        If True and in case the power curve does not end with a zero, will add zero power
-        output at the highest wind speed in the power curve. If False, a warning will be
-        raised if the power curve does not have a cut-out wind speed. The default is
-        False.
+        If True and in case the power curve does not end with a zero, will add
+        zero power output at the highest wind speed in the power curve. If
+        False, a warning will be raised if the power curve does not have a
+        cut-out wind speed. The default is False.
+    windspeed_bias_correction : DataArray, optional
+        Correction factor that is applied to the windspeed at
+        `windspeed_height`. Such a correction factor can be calculated using
+        :py:func:`atlite.wind.calculate_windspeed_bias_correction` with a raster
+        dataset of mean wind speeds.
+    windspeed_height : int, optional
+        Height in meters of windspeed data from which to extrapolate
 
     Note
     ----
@@ -541,7 +568,7 @@ def wind(cutout, turbine, smooth=False, add_cutout_windspeed=False, **params):
     1074 – 1088. doi:10.1016/j.energy.2015.09.071
 
     """
-    if isinstance(turbine, (str, Path, dict)):
+    if isinstance(turbine, str | Path | dict):
         turbine = get_windturbineconfig(
             turbine, add_cutout_windspeed=add_cutout_windspeed
         )
@@ -550,7 +577,11 @@ def wind(cutout, turbine, smooth=False, add_cutout_windspeed=False, **params):
         turbine = windturbine_smooth(turbine, params=smooth)
 
     return cutout.convert_and_aggregate(
-        convert_func=convert_wind, turbine=turbine, **params
+        convert_func=convert_wind,
+        turbine=turbine,
+        windspeed_bias_correction=windspeed_bias_correction,
+        from_height=windspeed_height,
+        **params,
     )
 
 
diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py
index 786e31b4..7b063358 100644
--- a/atlite/datasets/era5.py
+++ b/atlite/datasets/era5.py
@@ -8,6 +8,7 @@
 https://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation
 """
 
+import datetime
 import logging
 import os
 import warnings
@@ -323,7 +324,7 @@ def noisy_unlink(path):
         logger.error(f"Unable to delete file {path}, as it is still in use.")
 
 
-def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates):
+def retrieve_data(dataset, chunks=None, tmpdir=None, lock=None, **updates):
     """
     Download data like ERA5 from the Climate Data Store (CDS).
 
@@ -340,7 +341,7 @@ def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates):
     client = cdsapi.Client(
         info_callback=logger.debug, debug=logging.DEBUG >= logging.root.level
     )
-    result = client.retrieve(product, request)
+    result = client.retrieve(dataset, request)
 
     if lock is None:
         lock = nullcontext()
@@ -359,7 +360,7 @@ def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates):
     ds = xr.open_dataset(target, chunks=chunks or {})
     if tmpdir is None:
         logger.debug(f"Adding finalizer for {target}")
-        weakref.finalize(ds._file_obj._manager, noisy_unlink, target)
+        weakref.finalize(ds._close.__self__.ds, noisy_unlink, target)
 
     # Remove default encoding we get from CDSAPI, which can lead to NaN values after loading with subsequent
     # saving due to how xarray handles netcdf compression (only float encoded as short int seem affected)
@@ -373,6 +374,55 @@ def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates):
     return ds
 
 
+def retrieve_windspeed_average(cutout, height, first_year=1980, last_year=None):
+    """
+    Retrieve average windspeed from `first_year` to `last_year`
+
+    Parameters
+    ----------
+    cutout : atlite.Cutout
+        Cutout for which to retrieve windspeeds from CDS
+    height : int
+        Height of windspeeds (ERA5 typically knows about 10m, 100m, 150m?)
+    first_year : int
+        First year to take into account
+    last_year : int, optional
+        Last year to take into account (if omitted takes the previous year)
+
+    Returns
+    -------
+    DataArray
+        Mean windspeed at cutout dimension
+    """
+    if last_year is None:
+        last_year = datetime.date.today().year - 1
+
+    ds = retrieve_data(
+        "reanalysis-era5-single-levels-monthly-means",
+        chunks=cutout.chunks,
+        product_type="monthly_averaged_reanalysis",
+        variable=[
+            f"{height}m_u_component_of_wind",
+            f"{height}m_v_component_of_wind",
+        ],
+        area=_area(cutout.coords),
+        grid=[cutout.dx, cutout.dy],
+        year=[str(y) for y in range(first_year, last_year + 1)],
+        month=[f"{m:02}" for m in range(1, 12 + 1)],
+        time=["00:00"],
+    )
+    ds = _rename_and_clean_coords(ds)
+
+    return (
+        sqrt(ds[f"u{height}"] ** 2 + ds[f"v{height}"] ** 2)
+        .mean("date")
+        .assign_attrs(
+            units=ds[f"u{height}"].attrs["units"],
+            long_name=f"{height} metre wind speed as long run average",
+        )
+    )
+
+
 def get_data(
     cutout,
     feature,
@@ -418,7 +468,8 @@ def get_data(
     sanitize = creation_parameters.get("sanitize", True)
 
     retrieval_params = {
-        "product": "reanalysis-era5-single-levels",
+        "dataset": "reanalysis-era5-single-levels",
+        "product_type": "reanalysis",
         "area": _area(coords),
         "chunks": cutout.chunks,
         "grid": [cutout.dx, cutout.dy],
@@ -431,7 +482,7 @@ def get_data(
 
     logger.info(f"Requesting data for feature {feature}...")
 
-    def retrieve_once(time):
+    def retrieve_once(time, longrunaverage=False):
         ds = func({**retrieval_params, **time})
         if sanitize and sanitize_func is not None:
             ds = sanitize_func(ds)
diff --git a/atlite/wind.py b/atlite/wind.py
index 07f17d51..736f6b46 100644
--- a/atlite/wind.py
+++ b/atlite/wind.py
@@ -7,13 +7,20 @@
 
 import logging
 import re
+from pathlib import Path
 
 import numpy as np
+import rasterio as rio
+import xarray as xr
+
+from . import datasets
 
 logger = logging.getLogger(__name__)
 
 
-def extrapolate_wind_speed(ds, to_height, from_height=None):
+def extrapolate_wind_speed(
+    ds: xr.Dataset, to_height: int | float, from_height: int | None = None
+):
     """
     Extrapolate the wind speed from a given height above ground to another.
 
@@ -28,8 +35,7 @@ def extrapolate_wind_speed(ds, to_height, from_height=None):
         Dataset containing the wind speed time-series at 'from_height' with key
         'wnd{height:d}m' and the surface orography with key 'roughness' at the
         geographic locations of the wind speeds.
-    from_height : int
-        (Optional)
+    from_height : int, optional
         Height (m) from which the wind speeds are interpolated to 'to_height'.
         If not provided, the closest height to 'to_height' is selected.
     to_height : int|float
@@ -51,14 +57,11 @@ def extrapolate_wind_speed(ds, to_height, from_height=None):
     Retrieved 2019-02-15.
 
     """
-    # Fast lane
-    to_name = f"wnd{int(to_height):0d}m"
-    if to_name in ds:
-        return ds[to_name]
-
     if from_height is None:
         # Determine closest height to to_name
-        heights = np.asarray([int(s[3:-1]) for s in ds if re.match(r"wnd\d+m", s)])
+        heights = np.asarray(
+            [int(m.group(1)) for s in ds if (m := re.match(r"wnd(\d+)m", s))]
+        )
 
         if len(heights) == 0:
             raise AssertionError("Wind speed is not in dataset")
@@ -67,17 +70,81 @@ def extrapolate_wind_speed(ds, to_height, from_height=None):
 
     from_name = f"wnd{int(from_height):0d}m"
 
+    # Fast lane
+    if from_height == to_height:
+        return ds[from_name]
+
     # Wind speed extrapolation
     wnd_spd = ds[from_name] * (
         np.log(to_height / ds["roughness"]) / np.log(from_height / ds["roughness"])
     )
 
-    wnd_spd.attrs.update(
+    return wnd_spd.assign_attrs(
         {
             "long name": f"extrapolated {to_height} m wind speed using logarithmic "
             f"method with roughness and {from_height} m wind speed",
             "units": "m s**-1",
         }
-    )
+    ).rename(f"wnd{to_height}m")
+
+
+def calculate_windspeed_bias_correction(
+    cutout, real_average: str | rio.DatasetReader, height: int = 100
+):
+    """
+    Derive a bias correction factor for windspeed at lra_height
+
+    Regrids the raster dataset in real_average to cutout grid, retrieves the average
+    windspeed from the first dataset that offers
+    :py:func:`retrieve_longrunaverage_windspeed` (only ERA5, currently).
+
+    Parameters
+    ----------
+    cutout : Cutout
+        Atlite cutout
+    real_average : Path or rasterio.Dataset
+        Raster dataset with wind speeds to bias correct average wind speeds
+    height : int
+        Height in meters at which average windspeeds are provided
 
-    return wnd_spd.rename(to_name)
+    Returns
+    -------
+    DataArray
+        Ratio between windspeeds in `real_average` to those of average windspeeds in
+        dataset.
+    """
+    if isinstance(real_average, str | Path):
+        real_average = rio.open(real_average)
+
+    if isinstance(real_average, rio.DatasetReader):
+        real_average = rio.band(real_average, 1)
+
+    if isinstance(real_average, rio.Band):
+        real_average, transform = rio.warp.reproject(
+            real_average,
+            np.empty(cutout.shape),
+            dst_crs=cutout.crs,
+            dst_transform=cutout.transform,
+            dst_nodata=np.nan,
+            resampling=rio.enums.Resampling.average,
+        )
+
+        real_average = xr.DataArray(
+            real_average, [cutout.coords["y"], cutout.coords["x"]]
+        )
+
+    for module in np.atleast_1d(cutout.module):
+        retrieve_windspeed_average = getattr(
+            getattr(datasets, module), "retrieve_windspeed_average"
+        )
+        if retrieve_windspeed_average is not None:
+            break
+    else:
+        raise AssertionError(
+            "None of the datasets modules define retrieve_windspeed_average"
+        )
+
+    logger.info("Retrieving average windspeeds at %d from module %s", height, module)
+    data_average = retrieve_windspeed_average(cutout, height)
+
+    return real_average / data_average