generated from aequitas-aod/template-python-project-poetry
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Liam James
committed
Mar 6, 2024
1 parent
2deff1c
commit 7f2ce12
Showing
5 changed files
with
8 additions
and
339 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
scikit-learn==1.4.1.post1 | ||
numpy==1.26.4 | ||
aif360==0.6.0 | ||
pandas==2.2.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,52 +1,10 @@ | ||
import numpy as np | ||
import numpy.random as random | ||
import typing | ||
import pandas as pd | ||
|
||
|
||
SEED = 1 | ||
random.seed(SEED) | ||
|
||
|
||
def uniform_binary_dataset(rows: int = 1000, columns: int = 2) -> np.array: | ||
return random.uniform(0, 1, size=(rows, columns)).round().astype(int) | ||
|
||
|
||
def uniform_binary_dataset_gt(rows: int = 1000, columns: int = 2) -> np.array: | ||
xs = uniform_binary_dataset(rows, 1) | ||
labels = uniform_binary_dataset(rows, 1) | ||
noise = random.choice([0, 1], p=[0.8, 0.2], size=(rows, 1)) | ||
preds = abs(labels-noise) | ||
if columns > 2: | ||
data = [] | ||
for _ in range(columns - 2): | ||
data.append(uniform_binary_dataset(rows, 1)) | ||
data = np.concatenate(data, axis=1) | ||
return np.concatenate((data, xs, labels, preds), axis=1) | ||
else: | ||
return np.concatenate((xs, labels, preds), axis=1) | ||
|
||
def skewed_binary_dataset_gt(rows: int = 1000, columns: int = 2, p: float = 0.8) -> np.array: | ||
xs = uniform_binary_dataset(rows, 1) | ||
preds = np.array([bernoulli(p)[0] * x for x in xs]) | ||
labels = uniform_binary_dataset(rows, 1) | ||
if columns > 2: | ||
data = [] | ||
for _ in range(columns - 2): | ||
data.append(uniform_binary_dataset(rows, 1)) | ||
data = np.concatenate(data, axis=1) | ||
return np.concatenate((data, xs, labels, preds), axis=1) | ||
else: | ||
return np.concatenate((xs, labels, preds), axis=1) | ||
|
||
def bernoulli(p: float, size: typing.Tuple[int, int] = (1,)) -> np.array: | ||
assert 0 <= p <= 1, "p must be in [0, 1]" | ||
return (random.uniform(0, 1, size=size) < p).astype(int) | ||
|
||
|
||
def skewed_binary_dataset(rows: int = 1000, columns: int = 2, p: float = 0.8) -> np.array: | ||
xs = uniform_binary_dataset(rows, 1) | ||
cols = [] | ||
for _ in range(columns - 1): | ||
ys = np.array([bernoulli(p)[0] * x for x in xs]) | ||
cols.append(ys) | ||
return np.concatenate([xs] + cols, axis=1) | ||
def binary_label_dataset(rows: int = 1000) -> pd.DataFrame: | ||
features = random.uniform(0, 1, size=(rows, 1)) | ||
labels = random.uniform(0, 1, size=(rows, 1)).astype(int) | ||
data = np.concatenate([features] + [labels], axis=1) | ||
return pd.DataFrame(data, columns=['feat', 'label']) |
This file was deleted.
Oops, something went wrong.