Skip to content

Commit

Permalink
Merge pull request #155 from crowdcent/feature/era-col-from-dates
Browse files Browse the repository at this point in the history
Feature/era col from dates
  • Loading branch information
CarloLepelaars authored Dec 20, 2023
2 parents fe47fb7 + 7c6fe44 commit 8b2633f
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 54 deletions.
7 changes: 7 additions & 0 deletions numerblox/numerframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,13 @@ def get_dates_from_era_col(self) -> pd.Series:
"Era col is not 'era'. Please make sure to have a valid 'era' column to use for converting to dates."
return self[self.meta.era_col].astype(int).apply(self.get_date_from_era)

@property
def get_eras_from_date_col(self) -> pd.Series:
""" Column of all eras from date column. """
assert self.meta.era_col == "date" or self.meta.era_col == "friday_date", \
"Era col is not 'date' or 'friday_date'. Please make sure to have a valid 'date' or 'friday_date column to use for converting to eras."
return self[self.meta.era_col].apply(self.get_era_from_date)

def get_era_range(self, start_era: int, end_era: int) -> "NumerFrame":
"""
Get all eras between two era numbers.
Expand Down
8 changes: 8 additions & 0 deletions numerblox/preprocessing/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,10 @@ def transform(
output_df[f"{feature}_quantile{self.num_quantiles}"] = group_data
return output_df.to_numpy()

def fit_transform(self, X: Union[np.array, pd.DataFrame], eras: pd.Series):
self.fit(X=X, eras=eras)
return self.transform(X=X, eras=eras)

def get_feature_names_out(self, input_features=None) -> List[str]:
"""Return feature names."""
if not input_features:
Expand Down Expand Up @@ -374,6 +378,10 @@ def transform(self, X: Union[np.array, pd.DataFrame], tickers: pd.Series) -> np.
self.output_features = output_features
return X[output_features].to_numpy()

def fit_transform(self, X: Union[np.array, pd.DataFrame], tickers: pd.Series):
self.fit(X=X, tickers=tickers)
return self.transform(X=X, tickers=tickers)

def get_feature_names_out(self, input_features=None) -> List[str]:
"""Return feature names."""
return self.output_features if not input_features else input_features
Expand Down
106 changes: 53 additions & 53 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "numerblox"
version = "1.1.10"
version = "1.1.11"
description = "Solid Numerai Pipelines"
authors = ["CrowdCent <[email protected]>"]
license = "MIT License"
Expand Down
10 changes: 10 additions & 0 deletions tests/test_numerframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,16 @@ def test_get_dates_from_era_col():
assert all(result.index == nf.index[:5])
assert result.tolist() == [pd.Timestamp(ERA_ONE_START)] * len(result)

def test_get_eras_from_date_col():
dataset_copy = dataset.copy()
dataset_copy['date'] = [pd.Timestamp(ERA_ONE_START) + pd.Timedelta(days=i*7) for i in range(0, len(dataset_copy))]
dataset_copy = dataset_copy.drop(columns="era")
nf = NumerFrame(dataset_copy.iloc[:5])
result = nf.get_eras_from_date_col
assert isinstance(result, pd.Series)
assert all(result.index == nf.index[:5])
assert result.tolist() == [i+1 for i in range(0, len(result))]

def test_get_era_range():
nf = NumerFrame(dataset)
result = nf.get_era_range(start_era=1, end_era=3)
Expand Down

0 comments on commit 8b2633f

Please sign in to comment.