diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 808b70eeb..1e416404a 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -2,6 +2,7 @@ The next release must be bumped to v3.0.0. +- [#899](https://github.com/IAMconsortium/pyam/pull/899) Add `to_netcdf()` method - [#896](https://github.com/IAMconsortium/pyam/pull/896) Add `sort_data()` method - [#896](https://github.com/IAMconsortium/pyam/pull/896) Sort columns of `timeseries()` with mixed time domain - [#893](https://github.com/IAMconsortium/pyam/pull/893) No sorting of timeseries data on initialization or append diff --git a/pyam/core.py b/pyam/core.py index ab8e21fe0..7aeeecf0f 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -11,6 +11,8 @@ import pandas as pd from pandas.api.types import is_integer +from pyam.netcdf import to_xarray + try: from datapackage import Package @@ -2513,7 +2515,7 @@ def to_datapackage(self, path): Parameters ---------- path : string or path object - any valid string path or :class:`pathlib.Path` + Any valid string path or :class:`pathlib.Path`. """ if not HAS_DATAPACKAGE: raise ImportError("Required package `datapackage` not found!") @@ -2533,6 +2535,30 @@ def to_datapackage(self, path): # return the package (needs to reloaded because `tmp` was deleted) return Package(path) + def to_netcdf(self, path): + """Write object to a NetCDF file + + Parameters + ---------- + path : string or path object + Any valid string path or :class:`pathlib.Path`. + + See Also + -------- + pyam.read_netcdf + """ + self.to_xarray().to_netcdf(path) + + def to_xarray(self): + """Convert object to an :class:`xarray.Dataset` + + Returns + ------- + :class:`xarray.Dataset` + """ + df = swap_year_for_time(self) if self.time_domain == "year" else self + return to_xarray(df._data, df.meta) + def load_meta(self, path, sheet_name="meta", ignore_conflict=False, **kwargs): """Load 'meta' indicators from file diff --git a/pyam/netcdf.py b/pyam/netcdf.py index a69f963ad..cf736ab19 100644 --- a/pyam/netcdf.py +++ b/pyam/netcdf.py @@ -3,6 +3,8 @@ import numpy as np import pandas as pd +from pyam.index import get_index_levels + try: import xarray as xr @@ -10,9 +12,10 @@ except ModuleNotFoundError: xr = None HAS_XARRAY = False -from pyam.core import IamDataFrame from pyam.utils import IAMC_IDX, META_IDX +NETCDF_IDX = ["time", "model", "scenario", "region"] + def read_netcdf(path): """Read timeseries data and meta indicators from a netCDF file @@ -26,11 +29,15 @@ def read_netcdf(path): ---------- :class:`IamDataFrame` + See Also + -------- + pyam.IamDataFrame.to_netcdf """ + from pyam import IamDataFrame + if not HAS_XARRAY: - raise ModuleNotFoundError("Reading netcdf files requires 'xarray'") + raise ModuleNotFoundError("Reading netcdf files requires 'xarray'.") _ds = xr.open_dataset(path) - NETCDF_IDX = ["time", "model", "scenario", "region"] _list_variables = [i for i in _ds.to_dict()["data_vars"].keys()] # Check if the time coordinate is years (integers) or date time-format @@ -86,3 +93,45 @@ def read_netcdf(path): data, meta=_ds[_meta].to_dataframe().replace("nan", np.nan) if _meta else None, ) + + +def to_xarray(data_series: pd.Series, meta: pd.DataFrame): + """Convert timeseries data and meta indicators to an xarray Dataset + + Returns + ------- + :class:`xarray.Dataset` + + """ + if not HAS_XARRAY: + raise ModuleNotFoundError("Converting to xarray requires 'xarray'.") + + dataset = xr.Dataset() + + # add timeseries data-variables + for variable, _variable_data in data_series.groupby("variable"): + unit = get_index_levels(_variable_data, "unit") + + if len(unit) > 1: + raise ValueError( + "Cannot write to xarray for non-unique units in '" + variable + "'." + ) + + dataset[variable] = xr.DataArray( + _variable_data.droplevel(["variable", "unit"]).to_xarray(), + ) + dataset[variable].attrs = { + "unit": unit[0], + "long_name": variable, + } + + # add meta indicators as data-variables + for meta_indicator, meta_data in meta.items(): + meta_data = meta_data.replace(np.nan, "nan") + dataset[meta_indicator] = xr.DataArray( + meta_data.to_xarray(), + dims=META_IDX, + name=meta_indicator, + ) + + return dataset diff --git a/pyam/time.py b/pyam/time.py index 34077d9bc..356b7d146 100644 --- a/pyam/time.py +++ b/pyam/time.py @@ -5,7 +5,7 @@ from pyam.logging import raise_data_error -def swap_time_for_year(df, inplace, subannual=False): +def swap_time_for_year(df, inplace=False, subannual=False): """Internal implementation to swap 'time' domain to 'year' (as int)""" if not df.time_col == "time": raise ValueError("Time domain must be datetime to use this method") @@ -49,7 +49,7 @@ def swap_time_for_year(df, inplace, subannual=False): return ret -def swap_year_for_time(df, inplace): +def swap_year_for_time(df, inplace=False): """Internal implementation to swap 'year' domain to 'time' (as datetime)""" if not df.time_col == "year": diff --git a/tests/test_io.py b/tests/test_io.py index 4c97d4615..8f117297d 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -307,6 +307,7 @@ def test_io_datapackage(test_df, tmpdir): assert_iamframe_equal(test_df, import_df) -def test_io_netcdf(test_df_year): - obs = read_netcdf(TEST_DATA_DIR / "test_df.nc") - assert_iamframe_equal(obs, test_df_year) +def test_io_netcdf(test_df, tmpdir): + file = Path(tmpdir) / "foo.nc" + test_df.to_netcdf(file) + assert_iamframe_equal(read_netcdf(file), test_df)