diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 8eae144c3..f1d5eeeb1 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,5 +1,8 @@ -# Next Release +# Next release +The next release must be bumped to v3.0.0. + +- [#893](https://github.com/IAMconsortium/pyam/pull/893) No sorting of timeseries data on initialization or append - [#879](https://github.com/IAMconsortium/pyam/pull/879) Add `read_netcdf()` function # Release v2.3.0 diff --git a/pyam/core.py b/pyam/core.py index cfa0e9a5d..d984c2585 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -603,7 +603,7 @@ def append( # merge extra columns in `data` ret.extra_cols += [i for i in other.extra_cols if i not in ret.extra_cols] - ret._data = _data.sort_index() + ret._data = _data ret._set_attributes() if not inplace: @@ -805,17 +805,19 @@ def timeseries(self, iamc_index=False): reducing to IAMC-index yields an index with duplicates """ if self.empty: - raise ValueError("This IamDataFrame is empty!") + raise ValueError("This IamDataFrame is empty.") s = self._data if iamc_index: if self.time_col == "time": raise ValueError( - "Cannot use `iamc_index=True` with 'datetime' time-domain!" + "Cannot use `iamc_index=True` with 'datetime' time-domain." ) s = s.droplevel(self.extra_cols) - return s.unstack(level=self.time_col).rename_axis(None, axis=1) + return ( + s.unstack(level=self.time_col).sort_index(axis=1).rename_axis(None, axis=1) + ) def set_meta(self, meta, name=None, index=None): # noqa: C901 """Add meta indicators as pandas.Series, list or value (int/float/str) diff --git a/pyam/utils.py b/pyam/utils.py index 20571b306..8b1810dcf 100644 --- a/pyam/utils.py +++ b/pyam/utils.py @@ -433,7 +433,7 @@ def format_data(df, index, **kwargs): # noqa: C901 if df.empty: logger.warning("Formatted data is empty.") - return df.sort_index(), index, time_col, extra_cols + return df, index, time_col, extra_cols def _validate_complete_index(df): diff --git a/tests/test_core.py b/tests/test_core.py index 9803e0570..f6a220c98 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -11,6 +11,8 @@ from pyam.core import _meta_idx from pyam.utils import IAMC_IDX, META_IDX +from .conftest import TEST_DF + df_filter_by_meta_matching_idx = pd.DataFrame( [ ["model_a", "scen_a", "region_1", 1], @@ -42,7 +44,7 @@ @pytest.mark.parametrize("index", (None, META_IDX, ["model"])) -def test_init_df(test_pd_df, index): +def test_init_df_with_non_default_index(test_pd_df, index): """Casting to IamDataFrame and returning as `timeseries()` yields original frame""" # set a value to `nan` to check that timeseries columns are ordered correctly @@ -54,6 +56,19 @@ def test_init_df(test_pd_df, index): pdt.assert_frame_equal(obs, test_pd_df.set_index(IAMC_IDX), check_column_type=False) +def test_init_df_unsorted(test_pd_df): + """Casting unsorted timeseries data does not sort on init""" + + columns = IAMC_IDX + list(test_pd_df.columns[[6, 5]]) + unsorted_data = test_pd_df.iloc[[2, 0, 1]][columns] + df = IamDataFrame(unsorted_data) + + # `data` is not sorted + assert list(df.data.scenario.unique()) == ["scen_b", "scen_a"] + assert list(df.data.year.unique()) == [2010, 2005] + assert not df._data.index.is_monotonic_increasing + + def test_init_from_iamdf(test_df_year): # casting an IamDataFrame instance again works df = IamDataFrame(test_df_year) @@ -510,30 +525,56 @@ def test_variable_depth_with_list_raises(test_df, filter_name): pytest.raises(ValueError, test_df.filter, **{filter_name: [1, 2]}) -def test_timeseries(test_df): - dct = { - "model": ["model_a"] * 2, - "scenario": ["scen_a"] * 2, - "years": [2005, 2010], - "value": [1, 6], - } - exp = pd.DataFrame(dct).pivot_table( - index=["model", "scenario"], columns=["years"], values="value" - ) - obs = test_df.filter(scenario="scen_a", variable="Primary Energy").timeseries() - npt.assert_array_equal(obs, exp) +@pytest.mark.parametrize("unsort", [False, True]) +def test_timeseries(test_df, unsort): + """Assert that the timeseries is shown as expected even from unordered data""" + exp = TEST_DF.set_index(IAMC_IDX) + + if unsort: + # revert order of _data, then check that the index and columns are sorted anyway + data = test_df.data + if test_df.time_col == "time": + time = test_df.time + data.time = data.time.replace( + dict([(year, time[i]) for i, year in enumerate([2005, 2010])]) + ) + test_df = IamDataFrame(data.iloc[[5, 4, 3, 2, 1, 0]]) + # check that `data` is not sorted internally + unsorted_data = test_df.data + assert list(unsorted_data.scenario.unique()) == ["scen_b", "scen_a"] + if test_df.time_col == "year": + time = unsorted_data.year.unique() + else: + time = unsorted_data.time.unique() + assert time[0] > time[1] + + if test_df.time_col == "time": + exp.columns = test_df.time + exp.columns.name = None + + obs = test_df.timeseries() + pdt.assert_frame_equal(obs, exp, check_column_type=False) + + +def test_timeseries_wide_unsorted(test_pd_df): + """Assert that the timeseries is shown as expected even from unordered data""" + + # for some reason, `unstack` behaves differently if columns or rows are not sorted + exp = test_pd_df.set_index(IAMC_IDX) + obs = IamDataFrame(test_pd_df[IAMC_IDX + [2010, 2005]]).timeseries() + pdt.assert_frame_equal(obs, exp, check_column_type=False) def test_timeseries_empty_raises(test_df_year): """Calling `timeseries()` on an empty IamDataFrame raises""" _df = test_df_year.filter(model="foo") - with pytest.raises(ValueError, match="This IamDataFrame is empty!"): + with pytest.raises(ValueError, match="This IamDataFrame is empty."): _df.timeseries() def test_timeseries_time_iamc_raises(test_df_time): """Calling `timeseries(iamc_index=True)` on a continuous-time IamDataFrame raises""" - match = "Cannot use `iamc_index=True` with 'datetime' time-domain!" + match = "Cannot use `iamc_index=True` with 'datetime' time-domain." with pytest.raises(ValueError, match=match): test_df_time.timeseries(iamc_index=True) diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py index 0464bba1e..fd9eb409f 100644 --- a/tests/test_feature_aggregate.py +++ b/tests/test_feature_aggregate.py @@ -145,7 +145,7 @@ def test_aggregate_skip_intermediate(recursive_df): # make the data inconsistent, check (and then skip) validation recursive_df._data.iloc[0] = recursive_df._data.iloc[0] + 2 - recursive_df._data.iloc[3] = recursive_df._data.iloc[3] + 2 + recursive_df._data.iloc[2] = recursive_df._data.iloc[2] + 2 # create object without variables to be aggregated, but with intermediate variables v = "Secondary Energy|Electricity" @@ -324,7 +324,7 @@ def test_aggregate_region_with_negative_weights(simple_df, caplog): # dropping negative weights works as expected neg_weights_df = simple_df.copy() - neg_weights_df._data.iloc[18] = -6 + neg_weights_df._data.iloc[26] = -6 exp = simple_df.filter(variable=v, region="World", year=2010) assert_iamframe_equal(neg_weights_df.aggregate_region(v, weight=w), exp) diff --git a/tests/test_feature_append_concat.py b/tests/test_feature_append_concat.py index bfbeab97a..35c1f74f1 100644 --- a/tests/test_feature_append_concat.py +++ b/tests/test_feature_append_concat.py @@ -98,8 +98,8 @@ def test_concat(test_df, reverse, iterable): assert test_df.scenario == ["scen_a", "scen_b"] assert other.scenario == ["scen_c"] - # assert that merging of meta works as expected (reorder columns) - pdt.assert_frame_equal(result.meta[EXP_META.columns], EXP_META) + # assert that merging of meta works as expected + pdt.assert_frame_equal(result.meta[EXP_META.columns], EXP_META, check_like=True) # assert that appending data works as expected ts = result.timeseries() @@ -188,7 +188,31 @@ def test_concat_all_pd_dataframe(test_df): npt.assert_array_equal(ts.iloc[2].values, ts.iloc[3].values) -def test_append(test_df): +@pytest.mark.parametrize("inplace", (True, False)) +def test_append_data_not_sorted(test_pd_df, inplace): + """Appending timeseries data does not sort""" + + columns = IAMC_IDX + list(test_pd_df.columns[[6, 5]]) + unsorted_data = test_pd_df.iloc[[2, 1]][columns] + df = IamDataFrame(unsorted_data) + + if inplace: + obs = df.copy() + obs.append(test_pd_df.iloc[[0]], inplace=True) + else: + obs = df.append(test_pd_df.iloc[[0]]) + # assert that original object was not modified + assert len(df._data) == 4 + + # `data` is not sorted, only applies to pandas >= 2.2 + # TODO remove this if-statement when dropping support for pandas < 2.2 + if pd.__version__ >= "2.2": + assert list(obs.data.scenario.unique()) == ["scen_b", "scen_a"] + assert list(obs.data.year.unique()) == [2010, 2005] + assert not obs._data.index.is_monotonic_increasing + + +def test_append_meta(test_df): other = test_df.filter(scenario="scen_b").rename({"scenario": {"scen_b": "scen_c"}}) test_df.set_meta([0, 1], name="col1") diff --git a/tests/test_iiasa.py b/tests/test_iiasa.py index 9e72dd9b8..810f42f4d 100644 --- a/tests/test_iiasa.py +++ b/tests/test_iiasa.py @@ -11,7 +11,7 @@ from pyam import IamDataFrame, iiasa, lazy_read_iiasa, read_iiasa from pyam.testing import assert_iamframe_equal -from pyam.utils import META_IDX +from pyam.utils import IAMC_IDX, META_IDX from .conftest import IIASA_UNAVAILABLE, META_COLS, TEST_API, TEST_API_NAME @@ -375,7 +375,9 @@ def test_lazy_read(tmpdir): assert df.model == ["model_a"] # This is read from the file, so the filter is not applied. df2 = lazy_read_iiasa(tmp_file, TEST_API) - assert df.data.equals(df2.data) + assert ( + df.data.sort_values(by=IAMC_IDX).reset_index(drop=True).equals(df2.data) + ) # If requesting with an inconsistent filter, get nothing back. Strings and filters # work interchangably. tmp_file = str(tmp_file)