Merge pull request #24 from RyanAugust/dev

Support multiple simulations
RyanAugust · Feb 24, 2024 · b1bc6c5 · b1bc6c5
2 parents e58e563 + d717ac2
commit b1bc6c5
Show file tree

Hide file tree

Showing 9 changed files with 267 additions and 100 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 <img align="left" src="PySiMMMulator_logo.png" alt="logo" width="150"/>
 
 # PySiMMMulator
+
 [![CodeFactor](https://www.codefactor.io/repository/github/ryanaugust/pysimmmulator/badge)](https://www.codefactor.io/repository/github/ryanaugust/pysimmmulator)
 [![PyPI Downloads](https://img.shields.io/pypi/dm/pysimmmulator.svg?label=PyPI%20downloads)](
 https://pypi.org/project/pysimmmulator/)
@@ -26,7 +27,7 @@ Run using this method, you'll be returned both a dataframe of for MMM input as w
 
 ```python
 cfg = load_config(config_path="./my_config.yaml")
-sim = simmmulate()
+sim = simmm()
 mmm_input_df, channel_roi = sim.run_with_config(config=cfg)
 ```
 
@@ -35,7 +36,7 @@ mmm_input_df, channel_roi = sim.run_with_config(config=cfg)
 Alternatively you may run each of the stages independently, which allows for easier debugging and in-run adjustments based on the results of each stage. The order of the stages is reflected below **(without their inputs)**. Once you've run through every stage, results are available by calling the `sim.final_df` object (channel ROI results are stored as `sim.channel_roi`).
 
 ```python
-sim = simmmulate()
+sim = simmm()
 sim.simulate_baseline()
 sim.simulate_ad_spend()
 sim.simulate_media()
@@ -46,6 +47,7 @@ sim.consolidate_dataframe()
 sim.calculate_channel_roi()
 sim.finalize_output()
 ```
+
 ### Run via CLI
 
 A configuration file is required as input for this and should be passed as seen below. An output path can also be passed via `-o`, however when not passed the current working directory will be used.

diff --git a/src/pysimmmulator/__init__.py b/src/pysimmmulator/__init__.py
@@ -8,5 +8,6 @@
 with open(os.path.join(os.path.dirname(__file__), "VERSION")) as version_file:
     __version__ = version_file.read().strip()
 
-from .simulate import simmmulate
-from .load_parameters import load_config, define_basic_params
+
+from .simulate import simmm, multisimmm
+from .load_parameters import load_config, define_basic_params
diff --git a/src/pysimmmulator/command_line.py b/src/pysimmmulator/command_line.py
@@ -1,4 +1,4 @@
-from pysimmmulator import load_config, simmmulate
+from pysimmmulator import load_config, simmm
 import pandas as pd
 import argparse
 import os
@@ -11,21 +11,30 @@ def run_with_config(config_path, output_path):
         config_path (str): path to a valid config file, see example_config.yaml as example of `simmmulator` expected config format
     """
     cfg = load_config(config_path)
-    sim = simmmulate()
+    sim = simmm()
     (mmm_input_df, channel_roi) = sim.run_with_config(config=cfg)
 
     # save to current directory. Should be an optional argument for this
-    mmm_input_df.to_csv(os.path.join(output_path,"mmm_input_df.csv"), index=False)
-    pd.DataFrame.from_dict(channel_roi, orient="index", columns=["true_roi"]).to_csv(os.path.join(output_path,"channel_roi.csv"))
+    mmm_input_df.to_csv(os.path.join(output_path, "mmm_input_df.csv"), index=False)
+    pd.DataFrame.from_dict(channel_roi, orient="index", columns=["true_roi"]).to_csv(
+        os.path.join(output_path, "channel_roi.csv")
+    )
 
 
 def main():
     arg_parser = argparse.ArgumentParser()
     arg_parser.add_argument(
-        "-i", "--input-config", action="store", help="Provides configuration file path for simulation"
+        "-i",
+        "--input-config",
+        action="store",
+        help="Provides configuration file path for simulation",
     )
     arg_parser.add_argument(
-        "-o", "--output_path", action="store", help="Provides output destination", default="."
+        "-o",
+        "--output_path",
+        action="store",
+        help="Provides output destination",
+        default=".",
     )
     args = arg_parser.parse_args()
     run_with_config(config_path=args.input_config, output_path=args.output_path)
diff --git a/src/pysimmmulator/load_parameters.py b/src/pysimmmulator/load_parameters.py
@@ -15,15 +15,16 @@ def load_config(config_path: str) -> dict:
         cfg = yaml.load(f, Loader=yaml.FullLoader)
     return cfg
 
+
 def define_basic_params(
-        years,
-        channels_clicks,
-        channels_impressions,
-        frequency_of_campaigns,
-        start_date,
-        true_cvr,
-        revenue_per_conv
-    ):
+    years,
+    channels_clicks,
+    channels_impressions,
+    frequency_of_campaigns,
+    start_date,
+    true_cvr,
+    revenue_per_conv,
+):
     "Takes in requirements for basic_params and loads with dataclass for validation as precursor"
     my_basic_params = basic_parameters(
         years=years,
@@ -32,7 +33,7 @@ def define_basic_params(
         frequency_of_campaigns=frequency_of_campaigns,
         start_date=start_date,
         true_cvr=true_cvr,
-        revenue_per_conv=revenue_per_conv
+        revenue_per_conv=revenue_per_conv,
     )
-    
+
     return my_basic_params
diff --git a/src/pysimmmulator/param_handlers.py b/src/pysimmmulator/param_handlers.py
@@ -15,7 +15,8 @@ class basic_parameters:
         frequency_of_campaigns (int): how often campaigns occur (for example, frequency of 1 would yield a new campaign every 1 day with each campaign lasting 1 day).
         start_date (str): format yyyy/mm/dd that determines when your daily data set starts on
         true_cvr (list): what the underlying conversion rates of all the channels are, statistical noise will be added on top of this.
-        revenue_per_conv (float): How much money we make from a conversion (i.e. profit from a unit of sale)."""
+        revenue_per_conv (float): How much money we make from a conversion (i.e. profit from a unit of sale).
+    """
 
     years: int
     channels_impressions: list[str]
@@ -32,7 +33,9 @@ def __post_init__(self):
         self.evaluate_params()
 
     def evaluate_params(self):
-        assert self.years > 0, "You entered less than 1 year. Must generate more than a years worth of data"
+        assert (
+            self.years > 0
+        ), "You entered less than 1 year. Must generate more than a years worth of data"
         if self.true_cvr is not None:
             assert len(self.true_cvr.keys()) == len(
                 self.all_channels
@@ -48,7 +51,11 @@ def evaluate_params(self):
     def __repr__(self):
         channel_use_impressions = ", ".join(self.channels_impressions)
         channel_use_clicks = ", ".join(self.channels_clicks)
-        cvr_values = ", ".join([str(cvr) for cvr in self.true_cvr]) if self.true_cvr is not None else ""
+        cvr_values = (
+            ", ".join([str(cvr) for cvr in self.true_cvr])
+            if self.true_cvr is not None
+            else ""
+        )
 
         slug = f"""Years of Data to generate : {self.years}
 Channel that use impressions : {channel_use_impressions}
@@ -107,20 +114,27 @@ class ad_spend_parameters:
     max_min_proportion_on_each_channel: dict
 
     def __post_init__(self):
-        assert self.campaign_spend_mean > 0, "You entered a negative average campaign spend. Enter a positive number."
+        assert (
+            self.campaign_spend_mean > 0
+        ), "You entered a negative average campaign spend. Enter a positive number."
         assert (
             self.campaign_spend_std < self.campaign_spend_mean
         ), "You've entered a campaign spend standard deviation larger than the mean."
         for k, v in self.max_min_proportion_on_each_channel.items():
-            assert 0 < v["min"] <= 1, "Min spend must be between 0 and 1 for each channel"
-            assert 0 < v["max"] <= 1, "Max spend must be between 0 and 1 for each channel"
+            assert (
+                0 < v["min"] <= 1
+            ), "Min spend must be between 0 and 1 for each channel"
+            assert (
+                0 < v["max"] <= 1
+            ), "Max spend must be between 0 and 1 for each channel"
 
     def check(self, basic_params: basic_parameters):
         """Validates ad_spend parameters agianst previously constructed basic
         parameter values.
 
         Args:
-            basic_params (basic_parameters): Previously submitted parameters as required by the simmmulate class"""
+            basic_params (basic_parameters): Previously submitted parameters as required by the simmmulate class
+        """
         assert len(self.max_min_proportion_on_each_channel.keys()) - 1 == len(
             basic_params.all_channels
         ), "You did not input in enough numbers or put in too many numbers for proportion of spends on each channel. Must have a maximum and minimum percentage specified for all channels except the last channel, which will be auto calculated as any remaining amount."
@@ -144,7 +158,9 @@ class media_parameters:
     noisy_cpm_cpc: dict
 
     def __post_init__(self):
-        self.true_cpmcpc_channels = list(self.true_cpm.keys()) + list(self.true_cpc.keys())
+        self.true_cpmcpc_channels = list(self.true_cpm.keys()) + list(
+            self.true_cpc.keys()
+        )
         self.noise_channels = list(self.noisy_cpm_cpc.keys())
 
     def check(self, basic_params: basic_parameters):
@@ -170,7 +186,8 @@ class cvr_parameters:
     from input to simmmulate, will provide validation checks.
 
     Args:
-        noisy_cpm_cpc (dict): Specifies the bias and scale of noise added to the true value CVR for each channel."""
+        noisy_cpm_cpc (dict): Specifies the bias and scale of noise added to the true value CVR for each channel.
+    """
 
     noisy_cvr: dict
 
@@ -179,8 +196,12 @@ def __post_init__(self):
 
         for channel in self.noisy_cvr.keys():
             channel_spec = self.noisy_cvr[channel]
-            assert isinstance(channel_spec["loc"], float), "noisy loc value must be of type float"
-            assert isinstance(channel_spec["scale"], float), "noisy scale value must be of type float"
+            assert isinstance(
+                channel_spec["loc"], float
+            ), "noisy loc value must be of type float"
+            assert isinstance(
+                channel_spec["scale"], float
+            ), "noisy scale value must be of type float"
 
     def check(self, basic_params: basic_parameters):
         assert sorted(self.noise_channels) == sorted(
@@ -209,13 +230,21 @@ def __post_init__(self):
             assert isinstance(value, float), "lambda decay value must be of type float"
             assert 0 <= value <= 1, "lambda decay value must be between 0 and 1"
         for channel, value in self.alpha_saturation.items():
-            assert isinstance(value, float), "alpha saturation value must be of type float"
+            assert isinstance(
+                value, float
+            ), "alpha saturation value must be of type float"
         for channel, value in self.gamma_saturation.items():
-            assert isinstance(value, float), "gamma saturation value must be of type float"
+            assert isinstance(
+                value, float
+            ), "gamma saturation value must be of type float"
             assert 0 <= value <= 1, "gamma saturation value must be between 0 and 1"
 
     def check(self, basic_params: basic_parameters):
-        for input_dict in [self.true_lambda_decay, self.alpha_saturation, self.gamma_saturation]:
+        for input_dict in [
+            self.true_lambda_decay,
+            self.alpha_saturation,
+            self.gamma_saturation,
+        ]:
             assert sorted(list(input_dict.keys())) == sorted(
                 basic_params.all_channels
             ), f"Channels declared within {input_dict.__name__} must be the same as original base channel input"