IAMconsortium · danielhuppmann · Dec 16, 2024 · Dec 8, 2024 · Dec 8, 2024 · Dec 8, 2024
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,5 +1,8 @@
-# Next Release
+# Next release
 
+The next release must be bumped to v3.0.0.
+
+- [#893](https://github.com/IAMconsortium/pyam/pull/893) No sorting of timeseries data on initialization or append
 - [#879](https://github.com/IAMconsortium/pyam/pull/879) Add `read_netcdf()` function
 
 # Release v2.3.0

diff --git a/pyam/core.py b/pyam/core.py
@@ -603,7 +603,7 @@ def append(
 
         # merge extra columns in `data`
         ret.extra_cols += [i for i in other.extra_cols if i not in ret.extra_cols]
-        ret._data = _data.sort_index()
+        ret._data = _data
         ret._set_attributes()
 
         if not inplace:
@@ -805,17 +805,19 @@ def timeseries(self, iamc_index=False):
             reducing to IAMC-index yields an index with duplicates
         """
         if self.empty:
-            raise ValueError("This IamDataFrame is empty!")
+            raise ValueError("This IamDataFrame is empty.")
 
         s = self._data
         if iamc_index:
             if self.time_col == "time":
                 raise ValueError(
-                    "Cannot use `iamc_index=True` with 'datetime' time-domain!"
+                    "Cannot use `iamc_index=True` with 'datetime' time-domain."
                 )
             s = s.droplevel(self.extra_cols)
 
-        return s.unstack(level=self.time_col).rename_axis(None, axis=1)
+        return (
+            s.unstack(level=self.time_col).sort_index(axis=1).rename_axis(None, axis=1)
+        )
 
     def set_meta(self, meta, name=None, index=None):  # noqa: C901
         """Add meta indicators as pandas.Series, list or value (int/float/str)

diff --git a/pyam/utils.py b/pyam/utils.py
@@ -433,7 +433,7 @@ def format_data(df, index, **kwargs):  # noqa: C901
     if df.empty:
         logger.warning("Formatted data is empty.")
 
-    return df.sort_index(), index, time_col, extra_cols
+    return df, index, time_col, extra_cols
 
 
 def _validate_complete_index(df):

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -11,6 +11,8 @@
 from pyam.core import _meta_idx
 from pyam.utils import IAMC_IDX, META_IDX
 
+from .conftest import TEST_DF
+
 df_filter_by_meta_matching_idx = pd.DataFrame(
     [
         ["model_a", "scen_a", "region_1", 1],
@@ -42,7 +44,7 @@
 
 
 @pytest.mark.parametrize("index", (None, META_IDX, ["model"]))
-def test_init_df(test_pd_df, index):
+def test_init_df_with_non_default_index(test_pd_df, index):
     """Casting to IamDataFrame and returning as `timeseries()` yields original frame"""
 
     # set a value to `nan` to check that timeseries columns are ordered correctly
@@ -54,6 +56,19 @@ def test_init_df(test_pd_df, index):
     pdt.assert_frame_equal(obs, test_pd_df.set_index(IAMC_IDX), check_column_type=False)
 
 
+def test_init_df_unsorted(test_pd_df):
+    """Casting unsorted timeseries data does not sort on init"""
+
+    columns = IAMC_IDX + list(test_pd_df.columns[[6, 5]])
+    unsorted_data = test_pd_df.iloc[[2, 0, 1]][columns]
+    df = IamDataFrame(unsorted_data)
+
+    # `data` is not sorted
+    assert list(df.data.scenario.unique()) == ["scen_b", "scen_a"]
+    assert list(df.data.year.unique()) == [2010, 2005]
+    assert not df._data.index.is_monotonic_increasing
+
+
 def test_init_from_iamdf(test_df_year):
     # casting an IamDataFrame instance again works
     df = IamDataFrame(test_df_year)
@@ -510,30 +525,56 @@ def test_variable_depth_with_list_raises(test_df, filter_name):
     pytest.raises(ValueError, test_df.filter, **{filter_name: [1, 2]})
 
 
-def test_timeseries(test_df):
-    dct = {
-        "model": ["model_a"] * 2,
-        "scenario": ["scen_a"] * 2,
-        "years": [2005, 2010],
-        "value": [1, 6],
-    }
-    exp = pd.DataFrame(dct).pivot_table(
-        index=["model", "scenario"], columns=["years"], values="value"
-    )
-    obs = test_df.filter(scenario="scen_a", variable="Primary Energy").timeseries()
-    npt.assert_array_equal(obs, exp)
+@pytest.mark.parametrize("unsort", [False, True])
+def test_timeseries(test_df, unsort):
+    """Assert that the timeseries is shown as expected even from unordered data"""
+    exp = TEST_DF.set_index(IAMC_IDX)
+
+    if unsort:
+        # revert order of _data, then check that the index and columns are sorted anyway
+        data = test_df.data
+        if test_df.time_col == "time":
+            time = test_df.time
+            data.time = data.time.replace(
+                dict([(year, time[i]) for i, year in enumerate([2005, 2010])])
+            )
+        test_df = IamDataFrame(data.iloc[[5, 4, 3, 2, 1, 0]])
+        # check that `data` is not sorted internally
+        unsorted_data = test_df.data
+        assert list(unsorted_data.scenario.unique()) == ["scen_b", "scen_a"]
+        if test_df.time_col == "year":
+            time = unsorted_data.year.unique()
+        else:
+            time = unsorted_data.time.unique()
+        assert time[0] > time[1]
+
+    if test_df.time_col == "time":
+        exp.columns = test_df.time
+        exp.columns.name = None
+
+    obs = test_df.timeseries()
+    pdt.assert_frame_equal(obs, exp, check_column_type=False)
+
+
+def test_timeseries_wide_unsorted(test_pd_df):
+    """Assert that the timeseries is shown as expected even from unordered data"""
+
+    # for some reason, `unstack` behaves differently if columns or rows are not sorted
+    exp = test_pd_df.set_index(IAMC_IDX)
+    obs = IamDataFrame(test_pd_df[IAMC_IDX + [2010, 2005]]).timeseries()
+    pdt.assert_frame_equal(obs, exp, check_column_type=False)
 
 
 def test_timeseries_empty_raises(test_df_year):
     """Calling `timeseries()` on an empty IamDataFrame raises"""
     _df = test_df_year.filter(model="foo")
-    with pytest.raises(ValueError, match="This IamDataFrame is empty!"):
+    with pytest.raises(ValueError, match="This IamDataFrame is empty."):
         _df.timeseries()
 
 
 def test_timeseries_time_iamc_raises(test_df_time):
     """Calling `timeseries(iamc_index=True)` on a continuous-time IamDataFrame raises"""
-    match = "Cannot use `iamc_index=True` with 'datetime' time-domain!"
+    match = "Cannot use `iamc_index=True` with 'datetime' time-domain."
     with pytest.raises(ValueError, match=match):
         test_df_time.timeseries(iamc_index=True)
 

diff --git a/tests/test_feature_aggregate.py b/tests/test_feature_aggregate.py
@@ -145,7 +145,7 @@ def test_aggregate_skip_intermediate(recursive_df):
     # make the data inconsistent, check (and then skip) validation
 
     recursive_df._data.iloc[0] = recursive_df._data.iloc[0] + 2
-    recursive_df._data.iloc[3] = recursive_df._data.iloc[3] + 2
+    recursive_df._data.iloc[2] = recursive_df._data.iloc[2] + 2
 
     # create object without variables to be aggregated, but with intermediate variables
     v = "Secondary Energy|Electricity"
@@ -324,7 +324,7 @@ def test_aggregate_region_with_negative_weights(simple_df, caplog):
 
     # dropping negative weights works as expected
     neg_weights_df = simple_df.copy()
-    neg_weights_df._data.iloc[18] = -6
+    neg_weights_df._data.iloc[26] = -6
     exp = simple_df.filter(variable=v, region="World", year=2010)
     assert_iamframe_equal(neg_weights_df.aggregate_region(v, weight=w), exp)
 

diff --git a/tests/test_feature_append_concat.py b/tests/test_feature_append_concat.py
@@ -98,8 +98,8 @@ def test_concat(test_df, reverse, iterable):
     assert test_df.scenario == ["scen_a", "scen_b"]
     assert other.scenario == ["scen_c"]
 
-    # assert that merging of meta works as expected (reorder columns)
-    pdt.assert_frame_equal(result.meta[EXP_META.columns], EXP_META)
+    # assert that merging of meta works as expected
+    pdt.assert_frame_equal(result.meta[EXP_META.columns], EXP_META, check_like=True)
 
     # assert that appending data works as expected
     ts = result.timeseries()
@@ -188,7 +188,31 @@ def test_concat_all_pd_dataframe(test_df):
     npt.assert_array_equal(ts.iloc[2].values, ts.iloc[3].values)
 
 
-def test_append(test_df):
+@pytest.mark.parametrize("inplace", (True, False))
+def test_append_data_not_sorted(test_pd_df, inplace):
+    """Appending timeseries data does not sort"""
+
+    columns = IAMC_IDX + list(test_pd_df.columns[[6, 5]])
+    unsorted_data = test_pd_df.iloc[[2, 1]][columns]
+    df = IamDataFrame(unsorted_data)
+
+    if inplace:
+        obs = df.copy()
+        obs.append(test_pd_df.iloc[[0]], inplace=True)
+    else:
+        obs = df.append(test_pd_df.iloc[[0]])
+        # assert that original object was not modified
+        assert len(df._data) == 4
+
+    # `data` is not sorted, only applies to pandas >= 2.2
+    # TODO remove this if-statement when dropping support for pandas < 2.2
+    if pd.__version__ >= "2.2":
+        assert list(obs.data.scenario.unique()) == ["scen_b", "scen_a"]
+        assert list(obs.data.year.unique()) == [2010, 2005]
+        assert not obs._data.index.is_monotonic_increasing
+
+
+def test_append_meta(test_df):
     other = test_df.filter(scenario="scen_b").rename({"scenario": {"scen_b": "scen_c"}})
 
     test_df.set_meta([0, 1], name="col1")

diff --git a/tests/test_iiasa.py b/tests/test_iiasa.py
@@ -11,7 +11,7 @@
 
 from pyam import IamDataFrame, iiasa, lazy_read_iiasa, read_iiasa
 from pyam.testing import assert_iamframe_equal
-from pyam.utils import META_IDX
+from pyam.utils import IAMC_IDX, META_IDX
 
 from .conftest import IIASA_UNAVAILABLE, META_COLS, TEST_API, TEST_API_NAME
 
@@ -375,7 +375,9 @@ def test_lazy_read(tmpdir):
     assert df.model == ["model_a"]
     # This is read from the file, so the filter is not applied.
     df2 = lazy_read_iiasa(tmp_file, TEST_API)
-    assert df.data.equals(df2.data)
+    assert (
+        df.data.sort_values(by=IAMC_IDX).reset_index(drop=True).equals(df2.data)
+    )
     # If requesting with an inconsistent filter, get nothing back. Strings and filters
     # work interchangably.
     tmp_file = str(tmp_file)