Skip to content

Commit

Permalink
Merge pull request #24 from RyanAugust/dev
Browse files Browse the repository at this point in the history
Support multiple simulations
  • Loading branch information
RyanAugust authored Feb 24, 2024
2 parents e58e563 + d717ac2 commit b1bc6c5
Show file tree
Hide file tree
Showing 9 changed files with 267 additions and 100 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<img align="left" src="PySiMMMulator_logo.png" alt="logo" width="150"/>

# PySiMMMulator

[![CodeFactor](https://www.codefactor.io/repository/github/ryanaugust/pysimmmulator/badge)](https://www.codefactor.io/repository/github/ryanaugust/pysimmmulator)
[![PyPI Downloads](https://img.shields.io/pypi/dm/pysimmmulator.svg?label=PyPI%20downloads)](
https://pypi.org/project/pysimmmulator/)
Expand All @@ -26,7 +27,7 @@ Run using this method, you'll be returned both a dataframe of for MMM input as w

```python
cfg = load_config(config_path="./my_config.yaml")
sim = simmmulate()
sim = simmm()
mmm_input_df, channel_roi = sim.run_with_config(config=cfg)
```

Expand All @@ -35,7 +36,7 @@ mmm_input_df, channel_roi = sim.run_with_config(config=cfg)
Alternatively you may run each of the stages independently, which allows for easier debugging and in-run adjustments based on the results of each stage. The order of the stages is reflected below **(without their inputs)**. Once you've run through every stage, results are available by calling the `sim.final_df` object (channel ROI results are stored as `sim.channel_roi`).

```python
sim = simmmulate()
sim = simmm()
sim.simulate_baseline()
sim.simulate_ad_spend()
sim.simulate_media()
Expand All @@ -46,6 +47,7 @@ sim.consolidate_dataframe()
sim.calculate_channel_roi()
sim.finalize_output()
```

### Run via CLI

A configuration file is required as input for this and should be passed as seen below. An output path can also be passed via `-o`, however when not passed the current working directory will be used.
Expand Down
5 changes: 3 additions & 2 deletions src/pysimmmulator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@
with open(os.path.join(os.path.dirname(__file__), "VERSION")) as version_file:
__version__ = version_file.read().strip()

from .simulate import simmmulate
from .load_parameters import load_config, define_basic_params

from .simulate import simmm, multisimmm
from .load_parameters import load_config, define_basic_params
21 changes: 15 additions & 6 deletions src/pysimmmulator/command_line.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pysimmmulator import load_config, simmmulate
from pysimmmulator import load_config, simmm
import pandas as pd
import argparse
import os
Expand All @@ -11,21 +11,30 @@ def run_with_config(config_path, output_path):
config_path (str): path to a valid config file, see example_config.yaml as example of `simmmulator` expected config format
"""
cfg = load_config(config_path)
sim = simmmulate()
sim = simmm()
(mmm_input_df, channel_roi) = sim.run_with_config(config=cfg)

# save to current directory. Should be an optional argument for this
mmm_input_df.to_csv(os.path.join(output_path,"mmm_input_df.csv"), index=False)
pd.DataFrame.from_dict(channel_roi, orient="index", columns=["true_roi"]).to_csv(os.path.join(output_path,"channel_roi.csv"))
mmm_input_df.to_csv(os.path.join(output_path, "mmm_input_df.csv"), index=False)
pd.DataFrame.from_dict(channel_roi, orient="index", columns=["true_roi"]).to_csv(
os.path.join(output_path, "channel_roi.csv")
)


def main():
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument(
"-i", "--input-config", action="store", help="Provides configuration file path for simulation"
"-i",
"--input-config",
action="store",
help="Provides configuration file path for simulation",
)
arg_parser.add_argument(
"-o", "--output_path", action="store", help="Provides output destination", default="."
"-o",
"--output_path",
action="store",
help="Provides output destination",
default=".",
)
args = arg_parser.parse_args()
run_with_config(config_path=args.input_config, output_path=args.output_path)
21 changes: 11 additions & 10 deletions src/pysimmmulator/load_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,16 @@ def load_config(config_path: str) -> dict:
cfg = yaml.load(f, Loader=yaml.FullLoader)
return cfg


def define_basic_params(
years,
channels_clicks,
channels_impressions,
frequency_of_campaigns,
start_date,
true_cvr,
revenue_per_conv
):
years,
channels_clicks,
channels_impressions,
frequency_of_campaigns,
start_date,
true_cvr,
revenue_per_conv,
):
"Takes in requirements for basic_params and loads with dataclass for validation as precursor"
my_basic_params = basic_parameters(
years=years,
Expand All @@ -32,7 +33,7 @@ def define_basic_params(
frequency_of_campaigns=frequency_of_campaigns,
start_date=start_date,
true_cvr=true_cvr,
revenue_per_conv=revenue_per_conv
revenue_per_conv=revenue_per_conv,
)

return my_basic_params
57 changes: 43 additions & 14 deletions src/pysimmmulator/param_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ class basic_parameters:
frequency_of_campaigns (int): how often campaigns occur (for example, frequency of 1 would yield a new campaign every 1 day with each campaign lasting 1 day).
start_date (str): format yyyy/mm/dd that determines when your daily data set starts on
true_cvr (list): what the underlying conversion rates of all the channels are, statistical noise will be added on top of this.
revenue_per_conv (float): How much money we make from a conversion (i.e. profit from a unit of sale)."""
revenue_per_conv (float): How much money we make from a conversion (i.e. profit from a unit of sale).
"""

years: int
channels_impressions: list[str]
Expand All @@ -32,7 +33,9 @@ def __post_init__(self):
self.evaluate_params()

def evaluate_params(self):
assert self.years > 0, "You entered less than 1 year. Must generate more than a years worth of data"
assert (
self.years > 0
), "You entered less than 1 year. Must generate more than a years worth of data"
if self.true_cvr is not None:
assert len(self.true_cvr.keys()) == len(
self.all_channels
Expand All @@ -48,7 +51,11 @@ def evaluate_params(self):
def __repr__(self):
channel_use_impressions = ", ".join(self.channels_impressions)
channel_use_clicks = ", ".join(self.channels_clicks)
cvr_values = ", ".join([str(cvr) for cvr in self.true_cvr]) if self.true_cvr is not None else ""
cvr_values = (
", ".join([str(cvr) for cvr in self.true_cvr])
if self.true_cvr is not None
else ""
)

slug = f"""Years of Data to generate : {self.years}
Channel that use impressions : {channel_use_impressions}
Expand Down Expand Up @@ -107,20 +114,27 @@ class ad_spend_parameters:
max_min_proportion_on_each_channel: dict

def __post_init__(self):
assert self.campaign_spend_mean > 0, "You entered a negative average campaign spend. Enter a positive number."
assert (
self.campaign_spend_mean > 0
), "You entered a negative average campaign spend. Enter a positive number."
assert (
self.campaign_spend_std < self.campaign_spend_mean
), "You've entered a campaign spend standard deviation larger than the mean."
for k, v in self.max_min_proportion_on_each_channel.items():
assert 0 < v["min"] <= 1, "Min spend must be between 0 and 1 for each channel"
assert 0 < v["max"] <= 1, "Max spend must be between 0 and 1 for each channel"
assert (
0 < v["min"] <= 1
), "Min spend must be between 0 and 1 for each channel"
assert (
0 < v["max"] <= 1
), "Max spend must be between 0 and 1 for each channel"

def check(self, basic_params: basic_parameters):
"""Validates ad_spend parameters agianst previously constructed basic
parameter values.
Args:
basic_params (basic_parameters): Previously submitted parameters as required by the simmmulate class"""
basic_params (basic_parameters): Previously submitted parameters as required by the simmmulate class
"""
assert len(self.max_min_proportion_on_each_channel.keys()) - 1 == len(
basic_params.all_channels
), "You did not input in enough numbers or put in too many numbers for proportion of spends on each channel. Must have a maximum and minimum percentage specified for all channels except the last channel, which will be auto calculated as any remaining amount."
Expand All @@ -144,7 +158,9 @@ class media_parameters:
noisy_cpm_cpc: dict

def __post_init__(self):
self.true_cpmcpc_channels = list(self.true_cpm.keys()) + list(self.true_cpc.keys())
self.true_cpmcpc_channels = list(self.true_cpm.keys()) + list(
self.true_cpc.keys()
)
self.noise_channels = list(self.noisy_cpm_cpc.keys())

def check(self, basic_params: basic_parameters):
Expand All @@ -170,7 +186,8 @@ class cvr_parameters:
from input to simmmulate, will provide validation checks.
Args:
noisy_cpm_cpc (dict): Specifies the bias and scale of noise added to the true value CVR for each channel."""
noisy_cpm_cpc (dict): Specifies the bias and scale of noise added to the true value CVR for each channel.
"""

noisy_cvr: dict

Expand All @@ -179,8 +196,12 @@ def __post_init__(self):

for channel in self.noisy_cvr.keys():
channel_spec = self.noisy_cvr[channel]
assert isinstance(channel_spec["loc"], float), "noisy loc value must be of type float"
assert isinstance(channel_spec["scale"], float), "noisy scale value must be of type float"
assert isinstance(
channel_spec["loc"], float
), "noisy loc value must be of type float"
assert isinstance(
channel_spec["scale"], float
), "noisy scale value must be of type float"

def check(self, basic_params: basic_parameters):
assert sorted(self.noise_channels) == sorted(
Expand Down Expand Up @@ -209,13 +230,21 @@ def __post_init__(self):
assert isinstance(value, float), "lambda decay value must be of type float"
assert 0 <= value <= 1, "lambda decay value must be between 0 and 1"
for channel, value in self.alpha_saturation.items():
assert isinstance(value, float), "alpha saturation value must be of type float"
assert isinstance(
value, float
), "alpha saturation value must be of type float"
for channel, value in self.gamma_saturation.items():
assert isinstance(value, float), "gamma saturation value must be of type float"
assert isinstance(
value, float
), "gamma saturation value must be of type float"
assert 0 <= value <= 1, "gamma saturation value must be between 0 and 1"

def check(self, basic_params: basic_parameters):
for input_dict in [self.true_lambda_decay, self.alpha_saturation, self.gamma_saturation]:
for input_dict in [
self.true_lambda_decay,
self.alpha_saturation,
self.gamma_saturation,
]:
assert sorted(list(input_dict.keys())) == sorted(
basic_params.all_channels
), f"Channels declared within {input_dict.__name__} must be the same as original base channel input"
Expand Down
Loading

0 comments on commit b1bc6c5

Please sign in to comment.