diff --git a/momaland/learning/morl/random_centralised_agent_example.py b/momaland/learning/morl/random_centralised_agent_example.py index 438ba8e..154c954 100644 --- a/momaland/learning/morl/random_centralised_agent_example.py +++ b/momaland/learning/morl/random_centralised_agent_example.py @@ -44,7 +44,7 @@ def train_random(moma_env): num_timesteps=50, initial_map=test_map, randomise=True, - reward_mode="test", + reward_mode="individual", render_mode=None, ) @@ -55,11 +55,11 @@ def train_random(moma_env): type_distribution=[0.5, 0.5], position_distribution=[0.5, 1], num_timesteps=10, - reward_scheme="local", + reward_mode="individual", ) train_random(ig_env) - # train_random(mobpd_env) + train_random(mobpd_env) # train_sa_random(ig_env) # train_sa_random(mobpd_env) diff --git a/momaland/learning/morl/sa_env_factory.py b/momaland/learning/morl/sa_env_factory.py index 6fa48f1..74e5a3b 100644 --- a/momaland/learning/morl/sa_env_factory.py +++ b/momaland/learning/morl/sa_env_factory.py @@ -34,7 +34,7 @@ def make_single_agent_bpd_env(size="small"): bpd_env = mobeach_v0.parallel_env( num_timesteps=5, num_agents=10, - reward_scheme="global", + reward_mode="team", sections=3, capacity=2, type_distribution=(0.7, 0.3), @@ -44,7 +44,7 @@ def make_single_agent_bpd_env(size="small"): bpd_env = moitem_gathering_v0.parallel_env( num_timesteps=1, num_agents=50, - reward_scheme="global", + reward_mode="team", sections=5, capacity=3, type_distribution=(0.7, 0.3), diff --git a/momaland/learning/morl/train_ig_GPILS.py b/momaland/learning/morl/train_ig_GPILS.py index 99a3faa..cde8175 100644 --- a/momaland/learning/morl/train_ig_GPILS.py +++ b/momaland/learning/morl/train_ig_GPILS.py @@ -43,7 +43,7 @@ gradient_updates=10, target_net_update_freq=200, tau=1, - log=False, # set this to True to turn on wandb logging + log=True, project_name=project_name, seed=seed, ) diff --git a/momaland/learning/morl/train_ig_PCN.py b/momaland/learning/morl/train_ig_PCN.py index ac724f8..de0f538 100644 --- a/momaland/learning/morl/train_ig_PCN.py +++ b/momaland/learning/morl/train_ig_PCN.py @@ -40,7 +40,7 @@ batch_size=256, project_name=project_name, experiment_name="PCN", - log=False, # set this to True to turn on wandb logging + log=True, ) timesteps_per_iter = 10000 agent.train(