Skip to content

Commit

Permalink
umpdate pyaki
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul-B98 committed Jan 7, 2025
1 parent d04d9cc commit 398e489
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 76 deletions.
6 changes: 0 additions & 6 deletions pyAKI/__init__.py

This file was deleted.

Empty file added pyaki/__init__.py
Empty file.
22 changes: 13 additions & 9 deletions bin/process_aki_stages.py → pyaki/bin/process_aki_stages.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
#! /usr/bin/env python3
"""pyAKI CLI tool"""

import typer
"""pyaki CLI tool"""

from pathlib import Path

import pandas as pd
import typer

from pyAKI.kdigo import Analyser
from pyAKI.utils import Dataset, DatasetType
from pyaki.kdigo import Analyser
from pyaki.utils import Dataset, DatasetType


def main(
Expand All @@ -17,7 +16,7 @@ def main(
creatinine_file: str = "creatinine.csv",
rrt_file: str = "rrt.csv",
demographics_file: str = "demographics.csv",
):
) -> None:
"""
CLI tool to process AKI stages from time series data.
Expand Down Expand Up @@ -46,8 +45,8 @@ def main(
if (scr_file := root_dir / creatinine_file).is_file():
datasets.append(Dataset(DatasetType.CREATININE, pd.read_csv(scr_file)))

if (rrt_file := root_dir / rrt_file).is_file():
datasets.append(Dataset(DatasetType.RRT, pd.read_csv(rrt_file)))
if (_rrt_file := root_dir / rrt_file).is_file():
datasets.append(Dataset(DatasetType.RRT, pd.read_csv(_rrt_file)))

if (demo_file := root_dir / demographics_file).is_file():
datasets.append(Dataset(DatasetType.DEMOGRAPHICS, pd.read_csv(demo_file)))
Expand All @@ -56,5 +55,10 @@ def main(
ana.process_stays().to_csv(root_dir / "aki.csv")


if __name__ == "__main__":
def run() -> None:
"""Run the CLI tool"""
typer.run(main)


if __name__ == "__main__":
run()
46 changes: 17 additions & 29 deletions pyAKI/kdigo.py → pyaki/kdigo.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
import logging

from typing import Optional

import pandas as pd

from pyAKI.probes import (
Probe,
UrineOutputProbe,
AbsoluteCreatinineProbe,
RelativeCreatinineProbe,
RRTProbe,
)
from pyAKI.preprocessors import (
Preprocessor,
TimeIndexCreator,
UrineOutputPreProcessor,
from pyaki.preprocessors import (
CreatininePreProcessor,
DemographicsPreProcessor,
Preprocessor,
RRTPreProcessor,
TimeIndexCreator,
UrineOutputPreProcessor,
)

from pyAKI.utils import Dataset
from pyaki.probes import (
AbsoluteCreatinineProbe,
Probe,
RelativeCreatinineProbe,
RRTProbe,
UrineOutputProbe,
)
from pyaki.utils import Dataset

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -74,19 +72,11 @@ def __init__(
]
if preprocessors is None: # apply default preprocessors if not provided
preprocessors = [
TimeIndexCreator(
stay_identifier=stay_identifier, time_identifier=time_identifier
),
UrineOutputPreProcessor(
stay_identifier=stay_identifier, time_identifier=time_identifier
),
CreatininePreProcessor(
stay_identifier=stay_identifier, time_identifier=time_identifier
),
TimeIndexCreator(stay_identifier=stay_identifier, time_identifier=time_identifier),
UrineOutputPreProcessor(stay_identifier=stay_identifier, time_identifier=time_identifier),
CreatininePreProcessor(stay_identifier=stay_identifier, time_identifier=time_identifier),
DemographicsPreProcessor(stay_identifier=stay_identifier),
RRTPreProcessor(
stay_identifier=stay_identifier, time_identifier=time_identifier
),
RRTPreProcessor(stay_identifier=stay_identifier, time_identifier=time_identifier),
]

# validate datasets
Expand Down Expand Up @@ -164,9 +154,7 @@ def process_stay(self, stay_id: str) -> pd.DataFrame:
if isinstance(_df, pd.Series):
_df = pd.DataFrame([_df], index=df.index)
columns = set(_df.columns) - set(df.columns)
df = df.merge(
_df[[*columns]], how="outer", left_index=True, right_index=True
)
df = df.merge(_df[[*columns]], how="outer", left_index=True, right_index=True)

df["stage"] = df.filter(like="stage").max(axis=1)
return df.set_index(
Expand Down
6 changes: 2 additions & 4 deletions pyAKI/preprocessors.py → pyaki/preprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,13 @@
import pandas as pd
from pandas.api.types import is_datetime64_any_dtype

from pyAKI.utils import dataset_as_df, df_to_dataset, Dataset, DatasetType
from pyaki.utils import Dataset, DatasetType, dataset_as_df, df_to_dataset


class Preprocessor(ABC):
"""Abstract base class for preprocessors."""

def __init__(
self, stay_identifier: str = "stay_id", time_identifier: str = "charttime"
) -> None:
def __init__(self, stay_identifier: str = "stay_id", time_identifier: str = "charttime") -> None:
"""
Initialize a new instance of the Preprocessor class.
Expand Down
24 changes: 10 additions & 14 deletions pyAKI/probes.py → pyaki/probes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

from abc import ABC, ABCMeta
from enum import StrEnum, auto
from typing import Any

import pandas as pd
import numpy as np
import pandas as pd

import logging
from pyAKI.utils import dataset_as_df, df_to_dataset, approx_gte, Dataset, DatasetType
from pyaki.utils import Dataset, DatasetType, approx_gte, dataset_as_df, df_to_dataset


class Probe(ABC):
Expand Down Expand Up @@ -42,7 +42,7 @@ def probe(self, datasets: list[Dataset], **kwargs) -> pd.DataFrame:

RESNAME: str = "" # name of the column that will be added to the dataframe

def probe(self, datasets: list[Dataset], **kwargs) -> list[Dataset]:
def probe(self, datasets: list[Dataset], **kwargs: Any) -> list[Dataset]:
"""
Abstract method to be implemented by subclasses.
Expand Down Expand Up @@ -127,7 +127,7 @@ def probe(
self,
df: pd.DataFrame,
patient: pd.DataFrame,
**kwargs,
**kwargs: Any,
) -> pd.DataFrame:
"""
Perform urine output analysis on the provided DataFrame.
Expand Down Expand Up @@ -303,12 +303,10 @@ def creatinine_baseline(self, df: pd.DataFrame, patient: pd.DataFrame) -> pd.Ser
.ffill()[self._column]
)
min_value: pd.DatetimeIndex = values[
values.index
<= (values.index[0] + pd.Timedelta(self._baseline_timeframe))
values.index <= (values.index[0] + pd.Timedelta(self._baseline_timeframe))
].min() # calculate min value for first 7 days
values[
values.index
> (values.index[0] + pd.Timedelta(self._baseline_timeframe))
values.index > (values.index[0] + pd.Timedelta(self._baseline_timeframe))
] = min_value # set all values after first 7 days to min value

return values
Expand Down Expand Up @@ -436,7 +434,7 @@ def probe(
self,
df: pd.DataFrame,
patient: pd.DataFrame,
**kwargs,
**kwargs: Any,
) -> pd.DataFrame:
"""
Perform KDIGO stage calculation based on absolute creatinine elevations on the provided DataFrame.
Expand Down Expand Up @@ -480,7 +478,7 @@ class RelativeCreatinineProbe(AbstractCreatinineProbe):

@dataset_as_df(df=DatasetType.CREATININE, patient=DatasetType.DEMOGRAPHICS)
@df_to_dataset(DatasetType.CREATININE)
def probe(self, df: pd.DataFrame, patient: pd.DataFrame, **kwargs) -> pd.DataFrame:
def probe(self, df: pd.DataFrame, patient: pd.DataFrame, **kwargs: Any) -> pd.DataFrame:
"""
Perform calculation of relative creatinine elevations on the provided DataFrame.
Expand All @@ -498,9 +496,7 @@ def probe(self, df: pd.DataFrame, patient: pd.DataFrame, **kwargs) -> pd.DataFra
baseline_values: pd.Series = self.creatinine_baseline(df, patient)

df.loc[:, self.RESNAME] = 0
df.loc[
approx_gte((df[self._column] / baseline_values), 1.5), self.RESNAME
] = 1.0
df.loc[approx_gte((df[self._column] / baseline_values), 1.5), self.RESNAME] = 1.0
df.loc[approx_gte((df[self._column] / baseline_values), 2), self.RESNAME] = 2
df.loc[approx_gte((df[self._column] / baseline_values), 3), self.RESNAME] = 3

Expand Down
23 changes: 9 additions & 14 deletions pyAKI/utils.py → pyaki/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import logging

from typing import NamedTuple, cast
from enum import StrEnum, auto
from functools import wraps
from typing import Any, Callable, NamedTuple, cast

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -41,7 +40,7 @@ class Dataset(NamedTuple):
df: pd.DataFrame


def dataset_as_df(**mapping: DatasetType):
def dataset_as_df(**mapping: DatasetType) -> Callable:
"""
Decorator factory for methods that process datasets with dataframes.
Expand Down Expand Up @@ -82,14 +81,12 @@ def process_data(self, data: pd.DataFrame, labels: pd.DataFrame):

# in_mapping: Dict[DatasetType, str] = {v: k for k, v in mapping.items()}

def decorator(func):
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(self, datasets: list[Dataset], *args, **kwargs) -> list[Dataset]:
def wrapper(self: Any, datasets: list[Dataset], *args: Any, **kwargs: Any) -> list[Dataset]:
# map the dataset types to corresponding DataFrames
_mapping: dict[str, pd.DataFrame] = {
in_mapping[dtype]: df
for dtype, df in datasets
if dtype in in_mapping.keys()
in_mapping[dtype]: df for dtype, df in datasets if dtype in in_mapping.keys()
}
# check if all datasets are mapped, otherwise return the original datasets
if len(in_mapping) != len(_mapping):
Expand All @@ -103,16 +100,14 @@ def wrapper(self, datasets: list[Dataset], *args, **kwargs) -> list[Dataset]:
_dtype, _df = func(self, *args, **_mapping, **kwargs)

# return the updated datasets
return [
Dataset(dtype, _df if dtype == _dtype else df) for dtype, df in datasets
]
return [Dataset(dtype, _df if dtype == _dtype else df) for dtype, df in datasets]

return wrapper

return decorator


def df_to_dataset(dtype: DatasetType):
def df_to_dataset(dtype: DatasetType) -> Callable:
"""
Decorator that converts a DataFrame into a dataset with the specified type.
Expand All @@ -135,9 +130,9 @@ def process_dataframe(self, *args: list, **kwargs: dict) -> pd.DataFrame:
...
"""

def decorator(func):
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(self, *args: list, **kwargs: dict) -> Dataset:
def wrapper(self: Any, *args: Any, **kwargs: Any) -> Dataset:
return Dataset(dtype, func(self, *args, **kwargs))

return wrapper
Expand Down

0 comments on commit 398e489

Please sign in to comment.