Skip to content

Commit

Permalink
Merge pull request #85 from AnyBody-Research-Group/to_dataframe2
Browse files Browse the repository at this point in the history
To dataframe2
  • Loading branch information
melund authored Aug 12, 2021
2 parents 20bde77 + 604158d commit a220946
Show file tree
Hide file tree
Showing 12 changed files with 1,620 additions and 1,497 deletions.
28 changes: 27 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,32 @@
AnyPyTools Change Log
=====================


v1.6.0
=============

**Changed**:

* The `to_dataframe()` methods have been updated.
They now return a dataframe without an index by default.

They also now support interpolation of the data.

.. code-block:: python
app = AnyPyProcess()
results = app.start_marco(macro_list)
df = results.to_dataframe(
interp_var="Main.MyStudy.Output.Abscissa.t",
interp_val=linspace(0,1,50)
)
**Added**:

* Documentation on how to use the `to_dataframe()` method has been added to the tutorials.


v1.5.0
=============
Add methods for exporing simuation output as a pandas dataframe.
Expand Down Expand Up @@ -550,4 +576,4 @@ v0.8.0
<v0.8
=============
The before times... See GitHub for a full
[Full Changelog](https://github.com/AnyBody-Research-Group/AnyPyTools/compare/0.1...0.8.0)
[Full Changelog](https://github.com/AnyBody-Research-Group/AnyPyTools/compare/0.1...0.8.0)
2 changes: 1 addition & 1 deletion anypytools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"NORMAL_PRIORITY_CLASS",
]

__version__ = "1.5.0"
__version__ = "1.6.0"


def print_versions():
Expand Down
152 changes: 82 additions & 70 deletions anypytools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ def tolist(self):
for elem in self
]

def to_dataframe(self, index_var="auto", group_var=None):
def to_dataframe(self, index_var="auto", **kwargs):
"""Return output of all simuations as a concatenated pandas dataframe.
Parameters:
Expand All @@ -389,11 +389,13 @@ def to_dataframe(self, index_var="auto", group_var=None):
Name of the variable to use as axis 0 in the dataframe.
If not given system will look for variables ending with
"Ouput.Abcsissa.t"
group_var: str
Name of the variable which will be different across all
simulations. If not specified the index of the simuation will
be used, and a categorical 'group' column will be added to the
dataframe.
interp_var: str
Name of the variable on which the data is interpolating/resampled.
interp_val: np.ndarray
Values to use when re-interpolating/resampling the data.
interp_method: str
Method to use when re-interpolating/resampling the data. Defaults to 'cubic'.
Returns:
--------
Expand All @@ -404,32 +406,11 @@ def to_dataframe(self, index_var="auto", group_var=None):
except ImportError:
raise ImportError("pandas is required for this function")

dfs = []
index_name = None
for idx, elem in enumerate(self):
df = elem.to_dataframe(index_var)

if index_name and df.index.name != index_name:
raise ValueError(
"The index of the dataframe is not consistant across all elements of the output. "
)
else:
index_name = df.index.name

if group_var is not None:
group = group_var
if group_var not in df.columns:
raise KeyError(
f"The group variable {group_var} is not available element {idx}"
)
else:
group = "group"
df[group] = idx
dfs.append(df)
df_out = pd.concat(dfs, ignore_index=True, sort=False)
df_out[group] = pd.Categorical(df_out[group])

return df_out
dfs = [elem.to_dataframe(index_var, **kwargs) for elem in self]
dfout = pd.concat(dfs, keys=range(len(dfs)), sort=False)
if "task_id" in dfout.columns:
dfout["task_id"] = pd.Categorical(dfout.task_id, ordered=True)
return dfout


def _expand_short_path_name(short_path_name):
Expand Down Expand Up @@ -602,31 +583,10 @@ def __repr__(self, _repr_running={}, prefix=""):
finally:
del _repr_running[call_key]

def to_dataframe(self, index_var: Optional[str] = "auto"):
"""Convert the output to a pandas dataframe.
Parameters:
-----------
index_var: str
Name of the variable to use as axis 0 in the dataframe.
If "auto" is given the system will look for variables ending with
"Ouput.Abcsissa.t". If 'None' no index variable is used an only a single
row is returned in the dataframe.
Returns:
--------
df: pandas.DataFrame
Dataframe with the output data.
def _get_index_length(self, index_var="auto"):
"""Find the length of the index_var variable or look for a
time dimension in the data set if index_var="auto"
"""
try:
import pandas as pd
except ImportError:
raise ImportError("pandas is required for this function")

excluded_vars = ["task_macro"]

var_list = set(self.keys()) - set(excluded_vars)

if index_var == "auto":
timevars = [var for var in self if var.endswith("Output.Abscissa.t")]
if len(timevars) > 1:
Expand All @@ -649,23 +609,57 @@ def to_dataframe(self, index_var: Optional[str] = "auto"):
raise ValueError(f"The index var {index_var} should be a 1D array")

index_len = index_data.shape[0]
df_output = pd.DataFrame({index_var: self[index_var]})
# columns = [abscissa]
var_list -= set([index_var])
else:
index_len = 1
df_output = pd.DataFrame()
# columns = []
return index_len

def to_dataframe(
self,
index_var: Optional[str] = "auto",
interp_var=None,
interp_val=None,
interp_method="cubic",
):
"""Convert the output to a pandas dataframe.
Parameters:
-----------
index_var: str
Name of the variable to use as axis 0 in the dataframe.
If "auto" is given the system will look for variables ending with
"Ouput.Abcsissa.t". If 'None' no index variable is used an only a single
row is returned in the dataframe.
interp_var: str
Name of the variable on which the data is interpolating/resampled.
interp_val: np.ndarray
Values to use when re-interpolating/resampling the data.
interp_method: str
Method to use when re-interpolating/resampling the data. Defaults to 'cubic'.
Returns:
--------
df: pandas.DataFrame
Dataframe with the output data.
"""
try:
import pandas as pd
except ImportError:
raise ImportError("pandas is required for this function")

excluded_vars = ["task_macro"]

var_list = set(self.keys()) - set(excluded_vars)

index_len = self._get_index_length(index_var)

dfs = []
for var in var_list:
data = self[var]
# col_names = [var]
if isinstance(data, (int, float, str)):
data = np.array(data)
if isinstance(data, np.ndarray):
indices = np.array(list(np.ndindex(data.shape)))
data = np.atleast_2d(data.T).T
# if np.issubdtype(data.dtype, np.number):
if data.shape[0] != index_len:
data = data.flatten()
data = np.repeat(data[np.newaxis, :], index_len, axis=0)
Expand All @@ -678,15 +672,33 @@ def to_dataframe(self, index_var: Optional[str] = "auto"):
col_names = [
var + "".join(f"[{i}]" for i in index) for index in indices
]
df_output = pd.concat(
[df_output, pd.DataFrame(data, columns=col_names)], axis=1
)
dfs.append(pd.DataFrame(data, columns=col_names))

df_output = df_output.convert_dtypes()
if index_var:
df_output.set_index(index_var, inplace=True)
dfout = pd.concat(dfs, axis=1).convert_dtypes(
convert_integer=False, convert_floating=False
)

return df_output
if interp_var is not None:
if interp_var not in dfout.columns:
raise ValueError(
f"The `interp_var` {interp_var} could not be found in the data"
)
if interp_val is None:
interp_val = dfout[interp_var]
time_columns = list(dfout.columns[dfout.apply(pd.Series.nunique) > 1])
constant_columns = dfout.columns.difference(time_columns)
if interp_var not in time_columns:
raise ValueError("The `interp_var` should not be a constant")

dfout = dfout.set_index(interp_var, drop=True)
dfout = dfout.reindex(dfout.index.union(interp_val))
dfout[time_columns] = dfout[time_columns].interpolate(interp_method)
dfout[constant_columns] = dfout[constant_columns].fillna(method="bfill")

dfout = dfout.loc[interp_val]
dfout.reset_index(inplace=True)

return dfout


def _recursive_replace(iterable, old, new):
Expand Down
20 changes: 8 additions & 12 deletions docs/Tutorial/00_AnyPyTools_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,14 @@
"See the [full documentation](https://anybody-research-group.github.io/anypytools-docs/) for more information.\n",
"\n",
"\n",
"<img src=\"Tutorial files/relax.png\" alt=\"Don't panic\" align=\"left\" style=\"height: 150px;\"/>\n"
"<img src=\"Tutorial_files/relax.png\" alt=\"Don't panic\" align=\"left\" style=\"height: 150px;\"/>\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -52,14 +45,17 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
"version": "3.9.2"
},
"nbsphinx": {
"orphan": true
},
"widgets": {
"state": {},
"version": "1.1.2"
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
Expand Down
25 changes: 8 additions & 17 deletions docs/Tutorial/00_Install_and_setup.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,25 +34,13 @@
"> conda update anypytools\n",
"> ```\n",
"\n",
"<img src=\"Tutorial files/relax.png\" alt=\"\" align=\"left\" style=\"height: 150px;\"/>"
"<img src=\"Tutorial_files/relax.png\" alt=\"\" align=\"left\" style=\"height: 150px;\"/>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -66,14 +54,17 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
"version": "3.9.2"
},
"nbsphinx": {
"orphan": true
},
"widgets": {
"state": {},
"version": "1.1.2"
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit a220946

Please sign in to comment.