diff --git a/rl/boltzmann_money/README.md b/rl/boltzmann_money/README.md
new file mode 100644
index 00000000..195abe39
--- /dev/null
+++ b/rl/boltzmann_money/README.md
@@ -0,0 +1,15 @@
+# Balancing Wealth Inequality
+This folder showcases how to solve the Boltzmann wealth model with Proximal Policy Optimization (PPO) from Stable Baselines.
+
+## Key features:
+
+- Boltzmann Wealth Model: Agents with varying wealth navigate a grid, aiming to minimize inequality measured by the Gini coefficient.
+- PPO Training: A PPO agent is trained to achieve this goal, receiving sparse rewards based on Gini coefficient improvement and a large terminal reward for achieving low inequality.
+- Mesa Data Collection and Visualization: The Mesa data collector tool tracks Gini values during training, allowing for real-time visualization.
+- Visualization Script: Visualize the trained agent's behavior with Mesa's visualization tools, presenting agent movement and Gini values within the grid. You can run `server.py` file to test it with pre-trained model.
+
+## Model Behaviour
+As stable baselines controls multiple agents with the same weight, this results in the agents learning to move towards a corner of the grid. These brings all the agents together allowing exchange of money between them resulting in reward maximization.
+
+
+
\ No newline at end of file
diff --git a/rl/boltzmann_money/model.py b/rl/boltzmann_money/model.py
new file mode 100644
index 00000000..75479b0b
--- /dev/null
+++ b/rl/boltzmann_money/model.py
@@ -0,0 +1,161 @@
+"""
+This code implements a multi-agent model called MoneyModel using the Mesa library.
+The model simulates the distribution of wealth among agents in a grid environment.
+Each agent has a randomly assigned wealth and can move to neighboring cells.
+Agents can also give money to other agents in the same cell if they have greater wealth.
+The model is trained by a scientist who believes in an equal society and wants to minimize the Gini coefficient, which measures wealth inequality.
+The model is trained using the Proximal Policy Optimization (PPO) algorithm from the stable-baselines3 library.
+The trained model is saved as "ppo_money_model".
+"""
+
+import random
+
+import gymnasium
+import matplotlib.pyplot as plt
+
+# Import mesa
+import mesa
+
+# Import necessary libraries
+import numpy as np
+import seaborn as sns
+from mesa_models.boltzmann_wealth_model.model import (
+ BoltzmannWealthModel,
+ MoneyAgent,
+ compute_gini,
+)
+
+NUM_AGENTS = 10
+
+
+# Define the agent class
+class MoneyAgentRL(MoneyAgent):
+ def __init__(self, unique_id, model):
+ super().__init__(unique_id, model)
+ self.wealth = np.random.randint(1, NUM_AGENTS)
+
+ def move(self, action):
+ empty_neighbors = self.model.grid.get_neighborhood(
+ self.pos, moore=True, include_center=False
+ )
+
+ # Define the movement deltas
+ moves = {
+ 0: (1, 0), # Move right
+ 1: (-1, 0), # Move left
+ 2: (0, -1), # Move up
+ 3: (0, 1), # Move down
+ 4: (0, 0), # Stay in place
+ }
+
+ # Get the delta for the action, defaulting to (0, 0) if the action is invalid
+ dx, dy = moves.get(int(action), (0, 0))
+
+ # Calculate the new position and wrap around the grid
+ new_position = (
+ (self.pos[0] + dx) % self.model.grid.width,
+ (self.pos[1] + dy) % self.model.grid.height,
+ )
+
+ # Move the agent if the new position is in empty_neighbors
+ if new_position in empty_neighbors:
+ self.model.grid.move_agent(self, new_position)
+
+ def take_money(self):
+ # Get all agents in the same cell
+ cellmates = self.model.grid.get_cell_list_contents([self.pos])
+ if len(cellmates) > 1:
+ # Choose a random agent from the cellmates
+ other_agent = random.choice(cellmates)
+ if other_agent.wealth > self.wealth:
+ # Transfer money from other_agent to self
+ other_agent.wealth -= 1
+ self.wealth += 1
+
+ def step(self):
+ # Get the action for the agent
+ action = self.model.action_dict[self.unique_id]
+ # Move the agent based on the action
+ self.move(action)
+ # Take money from other agents in the same cell
+ self.take_money()
+
+
+# Define the model class
+class BoltzmannWealthModelRL(BoltzmannWealthModel, gymnasium.Env):
+ def __init__(self, N, width, height):
+ super().__init__(N, width, height)
+ # Define the observation and action space for the RL model
+ # The observation space is the wealth of each agent and their position
+ self.observation_space = gymnasium.spaces.Box(low=0, high=10 * N, shape=(N, 3))
+ # The action space is a MultiDiscrete space with 5 possible actions for each agent
+ self.action_space = gymnasium.spaces.MultiDiscrete([5] * N)
+ self.is_visualize = False
+
+ def step(self, action):
+ self.action_dict = action
+ # Perform one step of the model
+ self.schedule.step()
+ # Collect data for visualization
+ self.datacollector.collect(self)
+ # Compute the new Gini coefficient
+ new_gini = compute_gini(self)
+ # Compute the reward based on the change in Gini coefficient
+ reward = self.calculate_reward(new_gini)
+ self.prev_gini = new_gini
+ # Get the observation for the RL model
+ obs = self._get_obs()
+ if self.schedule.time > 5 * NUM_AGENTS:
+ # Terminate the episode if the model has run for a certain number of timesteps
+ done = True
+ reward = -1
+ elif new_gini < 0.1:
+ # Terminate the episode if the Gini coefficient is below a certain threshold
+ done = True
+ reward = 50 / self.schedule.time
+ else:
+ done = False
+ info = {}
+ truncated = False
+ return obs, reward, done, truncated, info
+
+ def calculate_reward(self, new_gini):
+ if new_gini < self.prev_gini:
+ # Compute the reward based on the decrease in Gini coefficient
+ reward = (self.prev_gini - new_gini) * 20
+ else:
+ # Penalize for increase in Gini coefficient
+ reward = -0.05
+ self.prev_gini = new_gini
+ return reward
+
+ def visualize(self):
+ # Visualize the Gini coefficient over time
+ gini = self.datacollector.get_model_vars_dataframe()
+ g = sns.lineplot(data=gini)
+ g.set(title="Gini Coefficient over Time", ylabel="Gini Coefficient")
+ plt.show()
+
+ def reset(self, *, seed=None, options=None):
+ if self.is_visualize:
+ # Visualize the Gini coefficient before resetting the model
+ self.visualize()
+ super().reset()
+ self.grid = mesa.space.MultiGrid(self.grid.width, self.grid.height, True)
+ self.schedule = mesa.time.RandomActivation(self)
+ for i in range(self.num_agents):
+ # Create MoneyAgentRL instances and add them to the schedule
+ a = MoneyAgentRL(i, self)
+ self.schedule.add(a)
+ x = self.random.randrange(self.grid.width)
+ y = self.random.randrange(self.grid.height)
+ self.grid.place_agent(a, (x, y))
+ self.prev_gini = compute_gini(self)
+ return self._get_obs(), {}
+
+ def _get_obs(self):
+ # The observation is the wealth of each agent and their position
+ obs = []
+ for a in self.schedule.agents:
+ obs.append([a.wealth, *list(a.pos)])
+ return np.array(obs)
diff --git a/rl/boltzmann_money/ppo_agent.gif b/rl/boltzmann_money/ppo_agent.gif
new file mode 100644
index 00000000..ac2d2520
Binary files /dev/null and b/rl/boltzmann_money/ppo_agent.gif differ
diff --git a/rl/boltzmann_money/server.py b/rl/boltzmann_money/server.py
new file mode 100644
index 00000000..8dbbc1e7
--- /dev/null
+++ b/rl/boltzmann_money/server.py
@@ -0,0 +1,69 @@
+import os
+
+import mesa
+from mesa.visualization.ModularVisualization import ModularServer
+from mesa.visualization.modules import ChartModule
+from model import BoltzmannWealthModelRL
+from stable_baselines3 import PPO
+
+
+# Modify the MoneyModel class to take actions from the RL model
+class MoneyModelRL(BoltzmannWealthModelRL):
+ def __init__(self, N, width, height):
+ super().__init__(N, width, height)
+ model_path = os.path.join(
+ os.path.dirname(__file__), "..", "model", "boltzmann_money.zip"
+ )
+ self.rl_model = PPO.load(model_path)
+ self.reset()
+
+ def step(self):
+ # Collect data
+ self.datacollector.collect(self)
+
+ # Get observations which is the wealth of each agent and their position
+ obs = self._get_obs()
+
+ action, _states = self.rl_model.predict(obs)
+ self.action_dict = action
+ self.schedule.step()
+
+
+# Define the agent portrayal with different colors for different wealth levels
+def agent_portrayal(agent):
+ if agent.wealth > 10:
+ color = "purple"
+ elif agent.wealth > 7:
+ color = "red"
+ elif agent.wealth > 5:
+ color = "orange"
+ elif agent.wealth > 3:
+ color = "yellow"
+ else:
+ color = "blue"
+
+ portrayal = {
+ "Shape": "circle",
+ "Filled": "true",
+ "Layer": 0,
+ "Color": color,
+ "r": 0.5,
+ }
+ return portrayal
+
+
+if __name__ == "__main__":
+ # Define a grid visualization
+ grid = mesa.visualization.CanvasGrid(agent_portrayal, 10, 10, 500, 500)
+
+ # Define a chart visualization
+ chart = ChartModule(
+ [{"Label": "Gini", "Color": "Black"}], data_collector_name="datacollector"
+ )
+
+ # Create a modular server
+ server = ModularServer(
+ MoneyModelRL, [grid, chart], "Money Model", {"N": 10, "width": 10, "height": 10}
+ )
+ server.port = 8521 # The default
+ server.launch()
diff --git a/rl/boltzmann_money/train.py b/rl/boltzmann_money/train.py
new file mode 100644
index 00000000..cd3e1c24
--- /dev/null
+++ b/rl/boltzmann_money/train.py
@@ -0,0 +1,35 @@
+import argparse
+
+from model import NUM_AGENTS, BoltzmannWealthModelRL
+from stable_baselines3 import PPO
+from stable_baselines3.common.callbacks import EvalCallback
+
+
+def rl_model(args):
+ # Create the environment
+ env = BoltzmannWealthModelRL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS)
+ eval_env = BoltzmannWealthModelRL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS)
+ eval_callback = EvalCallback(
+ eval_env, best_model_save_path="./logs/", log_path="./logs/", eval_freq=5000
+ )
+ # Define the PPO model
+ model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="./logs/")
+
+ # Train the model
+ model.learn(total_timesteps=args.stop_timesteps, callback=[eval_callback])
+
+ # Save the model
+ model.save("ppo_money_model")
+
+
+if __name__ == "__main__":
+ # Define the command line arguments
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--stop-timesteps",
+ type=int,
+ default=NUM_AGENTS * 100,
+ help="Number of timesteps to train.",
+ )
+ args = parser.parse_args()
+ rl_model(args)
diff --git a/rl/wolf_sheep/README.md b/rl/wolf_sheep/README.md
new file mode 100644
index 00000000..d877fc02
--- /dev/null
+++ b/rl/wolf_sheep/README.md
@@ -0,0 +1,33 @@
+# Collaborative Survival: Wolf-Sheep Predation Model
+
+This project demonstrates the use of the RLlib library to implement Multi-Agent Reinforcement Learning (MARL) in the classic Wolf-Sheep predation problem. The environment details can be found on the Mesa project's GitHub repository [here](https://github.com/projectmesa/mesa-examples/tree/main/examples/wolf_sheep).
+
+## Key Features
+
+**RLlib and Multi-Agent Learning**:
+- **Library Utilized**: The project leverages the RLlib library to concurrently train two independent PPO (Proximal Policy Optimization) agents.
+- **Agents**:
+ - **Wolf**: Predatory agent survives by eating sheeps
+ - **Sheep**: Prey agent survives by eating grass
+ - **Grass**: Grass is eaten by sheep and regrows with time
+
+**Input and Observation Space**:
+- **Observation Grid**: Each agent's policy receives a 10x10 grid centered on itself as input.
+ - **Grid Details**: The grid incorporates information about the presence of other agents (wolves, sheep, and grass) within the grid.
+ - **Agent's Energy Level**: The agent's current energy level is also included in the observations.
+
+**Action Space**:
+- **Action Space**: The action space is the ID of the neighboring tile to which the agent wants to move.
+
+**Behavior and Training Outcomes**:
+- **Optimal Behavior**:
+ - **Wolf**: Learns to move towards the nearest sheep.
+ - **Sheep**: Learns to run away from wolves and is attracted to grass.
+- **Density Variations**: You can vary the densities of sheep and wolves to observe different results.
+
+By leveraging RLlib and Multi-Agent Learning, this project provides insights into the dynamics of predator-prey relationships and optimal behavior strategies in a simulated environment.
+
+
+
+
+
\ No newline at end of file
diff --git a/rl/wolf_sheep/agents.py b/rl/wolf_sheep/agents.py
new file mode 100644
index 00000000..c6e5e959
--- /dev/null
+++ b/rl/wolf_sheep/agents.py
@@ -0,0 +1,80 @@
+from mesa_models.wolf_sheep.agents import GrassPatch, Sheep, Wolf
+
+from .utility import move
+
+
+class SheepRL(Sheep):
+ def step(self):
+ """
+ The code is exactly same as mesa-example with the only difference being the move function and new sheep creation class.
+ Link : https://github.com/projectmesa/mesa-examples/blob/main/examples/wolf_sheep/wolf_sheep/agents.py
+ """
+ action = self.model.action_dict[self.unique_id]
+ move(self, action)
+
+ living = True
+
+ if self.model.grass:
+ # Reduce energy
+ self.energy -= 1
+
+ # If there is grass available, eat it
+ this_cell = self.model.grid.get_cell_list_contents([self.pos])
+ grass_patch = next(obj for obj in this_cell if isinstance(obj, GrassPatch))
+ if grass_patch.fully_grown:
+ self.energy += self.model.sheep_gain_from_food
+ grass_patch.fully_grown = False
+
+ # Death
+ if self.energy < 0:
+ self.model.grid.remove_agent(self)
+ self.model.schedule.remove(self)
+ living = False
+
+ if living and self.random.random() < self.model.sheep_reproduce:
+ # Create a new sheep:
+ if self.model.grass:
+ self.energy /= 2
+ unique_id_str = f"sheep_{self.model.next_id()}"
+ lamb = SheepRL(unique_id_str, self.pos, self.model, self.moore, self.energy)
+ self.model.grid.place_agent(lamb, self.pos)
+ self.model.schedule.add(lamb)
+
+
+class WolfRL(Wolf):
+ def step(self):
+ """
+ The code is exactly same as mesa-example with the only difference being the move function and new wolf creation class.
+ Link : https://github.com/projectmesa/mesa-examples/blob/main/examples/wolf_sheep/wolf_sheep/agents.py
+ """
+ action = self.model.action_dict[self.unique_id]
+ move(self, action)
+
+ self.energy -= 1
+
+ # If there are sheep present, eat one
+ x, y = self.pos
+ this_cell = self.model.grid.get_cell_list_contents([self.pos])
+ sheep = [obj for obj in this_cell if isinstance(obj, Sheep)]
+ if len(sheep) > 0:
+ sheep_to_eat = self.random.choice(sheep)
+ self.energy += self.model.wolf_gain_from_food
+
+ # Kill the sheep
+ self.model.grid.remove_agent(sheep_to_eat)
+ self.model.schedule.remove(sheep_to_eat)
+
+ # Death or reproduction
+ if self.energy < 0:
+ self.model.grid.remove_agent(self)
+ self.model.schedule.remove(self)
+ else:
+ if self.random.random() < self.model.wolf_reproduce:
+ # Create a new wolf cub
+ self.energy /= 2
+ unique_id_str = f"wolf_{self.model.next_id()}"
+ cub = WolfRL(
+ unique_id_str, self.pos, self.model, self.moore, self.energy
+ )
+ self.model.grid.place_agent(cub, cub.pos)
+ self.model.schedule.add(cub)
diff --git a/rl/wolf_sheep/model.py b/rl/wolf_sheep/model.py
new file mode 100644
index 00000000..ee580c56
--- /dev/null
+++ b/rl/wolf_sheep/model.py
@@ -0,0 +1,178 @@
+import gymnasium as gym
+import mesa
+import numpy as np
+from mesa_models.wolf_sheep.agents import GrassPatch
+from mesa_models.wolf_sheep.model import WolfSheep
+from mesa_models.wolf_sheep.scheduler import RandomActivationByTypeFiltered
+from ray.rllib.env import MultiAgentEnv
+
+from .agents import SheepRL, WolfRL
+from .utility import create_intial_agents, grid_to_observation
+
+
+class WolfSheepRL(WolfSheep, MultiAgentEnv):
+ """
+ WolfRL-Sheep Predation Model
+ """
+
+ def __init__(
+ self,
+ width=20,
+ height=20,
+ initial_sheep=100,
+ initial_wolves=25,
+ sheep_reproduce=0.04,
+ wolf_reproduce=0.05,
+ wolf_gain_from_food=20,
+ grass=True,
+ grass_regrowth_time=30,
+ sheep_gain_from_food=4,
+ vision=4,
+ ):
+ """
+ Create a new WolfRL-Sheep model with the given parameters.
+ """
+ super().__init__(
+ width,
+ height,
+ initial_sheep,
+ initial_wolves,
+ sheep_reproduce,
+ wolf_reproduce,
+ wolf_gain_from_food,
+ grass,
+ grass_regrowth_time,
+ sheep_gain_from_food,
+ )
+ # Defining RL specific attributes
+ self.vision = vision
+ # The observation space is a dictionary containing the grid and energy of the agent
+ self.observation_space = gym.spaces.Dict(
+ {
+ "grid": gym.spaces.Box(
+ low=0, high=1, shape=((self.vision * 2 + 1) ** 2 - 1, 3), dtype=int
+ ), # 3 for sheep, wolf, grass
+ "energy": gym.spaces.Box(
+ low=-1, high=np.inf, shape=(1,), dtype=np.float32
+ ),
+ }
+ )
+ # The action space is a discrete space with 5 actions of moving up, down, left, right
+ self.action_space = gym.spaces.Discrete(4)
+ self.max_steps = 500
+ self.datacollector = mesa.DataCollector(
+ {
+ "Wolves": lambda m: m.schedule.get_type_count(WolfRL),
+ "Sheep": lambda m: m.schedule.get_type_count(SheepRL),
+ "Grass": lambda m: m.schedule.get_type_count(
+ GrassPatch, lambda x: x.fully_grown
+ ),
+ }
+ )
+
+ def step(self, action_dict):
+ self.action_dict = action_dict
+ self.schedule.step()
+ self.datacollector.collect(self)
+
+ # Get rewards
+ rewards = self.cal_reward()
+
+ # Get observations
+ # We convert grid to a matrix and then neighbors of each agent is extracted
+ grid_to_observation(self, SheepRL, WolfRL, GrassPatch)
+ obs = {}
+ for agent in self.schedule.agents:
+ if isinstance(agent, (SheepRL, WolfRL)):
+ neighbors = agent.model.grid.get_neighborhood(
+ agent.pos, moore=True, radius=self.vision
+ )
+
+ obs[agent.unique_id] = {
+ "grid": np.array(
+ [
+ self.obs_grid[neighbor[0]][neighbor[1]]
+ for neighbor in neighbors
+ ]
+ ),
+ "energy": np.array([agent.energy]),
+ }
+
+ # Either time finishes or either wolves or sheep are extinct
+ done = {
+ a.unique_id: False
+ for a in self.schedule.agents
+ if isinstance(a, (SheepRL, WolfRL))
+ }
+
+ # Check if either wolves or sheep are extinct
+ if (
+ self.schedule.get_type_count(WolfRL) == 0
+ or self.schedule.get_type_count(SheepRL) == 0
+ or self.schedule.time > self.max_steps
+ ):
+ done["__all__"] = True
+ else:
+ done["__all__"] = False
+
+ # Prepare info dictionary
+ truncated = {
+ a.unique_id: False
+ for a in self.schedule.agents
+ if isinstance(a, (SheepRL, WolfRL))
+ }
+ truncated["__all__"] = np.all(list(truncated.values()))
+
+ # All the agents that dies during this step are marked as done and rewarded penalty
+ sample = next(iter(obs.values()))
+ for agent_id in action_dict:
+ if agent_id not in rewards:
+ done[agent_id] = True
+ rewards[agent.unique_id] = -20
+ truncated[agent.unique_id] = False
+ # generate a sample observation with 0 -1
+
+ obs[agent_id] = {
+ "grid": np.zeros_like(sample["grid"]),
+ "energy": np.array([-1]),
+ }
+
+ return obs, rewards, done, truncated, {}
+
+ def cal_reward(self):
+ rewards = {}
+ # Calculate rewards
+ # Agents are rewarded for being alive and having energy
+ for agent in self.schedule.agents:
+ if isinstance(agent, (SheepRL, WolfRL)):
+ if isinstance(agent, SheepRL):
+ rewards[agent.unique_id] = min(4, agent.energy - 4)
+ else:
+ rewards[agent.unique_id] = min(4, agent.energy / 5 - 4)
+ return rewards
+
+ def reset(self, *, seed=None, options=None):
+ # Reset your environment here
+ super().reset()
+ self.schedule = RandomActivationByTypeFiltered(self)
+ self.grid = mesa.space.MultiGrid(self.width, self.height, torus=True)
+ self.current_id = 0
+ create_intial_agents(self, SheepRL, WolfRL, GrassPatch)
+ grid_to_observation(self, SheepRL, WolfRL, GrassPatch)
+ obs = {}
+ for agent in self.schedule.agents:
+ if isinstance(agent, (SheepRL, WolfRL)):
+ neighbors = agent.model.grid.get_neighborhood(
+ agent.pos, moore=True, radius=self.vision
+ )
+
+ obs[agent.unique_id] = {
+ "grid": np.array(
+ [
+ self.obs_grid[neighbor[0]][neighbor[1]]
+ for neighbor in neighbors
+ ]
+ ),
+ "energy": np.array([agent.energy]),
+ }
+ return obs, {}
diff --git a/rl/wolf_sheep/resources/sheep.png b/rl/wolf_sheep/resources/sheep.png
new file mode 100644
index 00000000..dfb81b0e
Binary files /dev/null and b/rl/wolf_sheep/resources/sheep.png differ
diff --git a/rl/wolf_sheep/resources/wolf.png b/rl/wolf_sheep/resources/wolf.png
new file mode 100644
index 00000000..5357b855
Binary files /dev/null and b/rl/wolf_sheep/resources/wolf.png differ
diff --git a/rl/wolf_sheep/resources/wolf_sheep.gif b/rl/wolf_sheep/resources/wolf_sheep.gif
new file mode 100644
index 00000000..08d3e589
Binary files /dev/null and b/rl/wolf_sheep/resources/wolf_sheep.gif differ
diff --git a/rl/wolf_sheep/server.py b/rl/wolf_sheep/server.py
new file mode 100644
index 00000000..f21c54c0
--- /dev/null
+++ b/rl/wolf_sheep/server.py
@@ -0,0 +1,190 @@
+import os
+
+import mesa
+import numpy as np
+from mesa_models.wolf_sheep.agents import GrassPatch
+from ray import tune
+from ray.rllib.algorithms.algorithm import Algorithm
+
+from .agents import SheepRL, WolfRL
+from .model import WolfSheepRL
+from .utility import grid_to_observation
+
+
+class WolfSheepServer(WolfSheepRL):
+ def __init__(
+ self,
+ width=20,
+ height=20,
+ initial_sheep=100,
+ initial_wolves=25,
+ sheep_reproduce=0.04,
+ wolf_reproduce=0.05,
+ wolf_gain_from_food=20,
+ grass=True,
+ grass_regrowth_time=30,
+ sheep_gain_from_food=4,
+ model_path=None,
+ ):
+ super().__init__(
+ width,
+ height,
+ initial_sheep,
+ initial_wolves,
+ sheep_reproduce,
+ wolf_reproduce,
+ wolf_gain_from_food,
+ grass,
+ grass_regrowth_time,
+ sheep_gain_from_food,
+ )
+
+ def env_creator(_):
+ return WolfSheepRL(
+ width,
+ height,
+ initial_sheep,
+ initial_wolves,
+ sheep_reproduce,
+ wolf_reproduce,
+ wolf_gain_from_food,
+ grass,
+ grass_regrowth_time,
+ sheep_gain_from_food,
+ )
+
+ tune.register_env("WorldSheepModel-v0", env_creator)
+ self.iteration = 0
+ # Load the model from checkpoint
+ checkpoint_path = model_path
+ algo = Algorithm.from_checkpoint(checkpoint_path)
+ self.wolf_policy = algo.get_policy("policy_wolf")
+ self.sheep_policy = algo.get_policy("policy_sheep")
+
+ def step(self):
+ if self.iteration == 0:
+ self.reset()
+ self.datacollector.collect(self)
+ # Get the observation for each agent
+ grid_to_observation(self, SheepRL, WolfRL, GrassPatch)
+ obs = {}
+ for agent in self.schedule.agents:
+ if isinstance(agent, (SheepRL, WolfRL)):
+ neighbors = agent.model.grid.get_neighborhood(
+ agent.pos, moore=True, radius=self.vision
+ )
+ obs[agent.unique_id] = {
+ "grid": np.array(
+ [
+ self.obs_grid[neighbor[0]][neighbor[1]]
+ for neighbor in neighbors
+ ]
+ ),
+ "energy": np.array([agent.energy]),
+ }
+ action_dict = {}
+ # Get the action for each agent
+ for agent in self.schedule.agents:
+ if isinstance(agent, SheepRL):
+ action_dict[agent.unique_id] = self.sheep_policy.compute_single_action(
+ obs[agent.unique_id], explore=False
+ )[0]
+ elif isinstance(agent, WolfRL):
+ action_dict[agent.unique_id] = self.wolf_policy.compute_single_action(
+ obs[agent.unique_id], explore=False
+ )[0]
+ self.action_dict = action_dict
+ # Take a step in the environment
+ self.schedule.step()
+ self.iteration += 1
+ if (
+ self.schedule.get_type_count(WolfRL) == 0
+ or self.schedule.get_type_count(SheepRL) == 0
+ or self.schedule.time > self.max_steps
+ ):
+ self.running = False
+
+
+def wolf_sheep_portrayal(agent):
+ if agent is None:
+ return
+
+ portrayal = {}
+ file_path = os.path.dirname(os.path.abspath(__file__))
+ resources_path = os.path.join(file_path, "resources")
+
+ if type(agent) is SheepRL:
+ portrayal["Shape"] = os.path.join(resources_path, "sheep.png")
+ portrayal["scale"] = 0.9
+ portrayal["Layer"] = 1
+
+ elif type(agent) is WolfRL:
+ portrayal["Shape"] = os.path.join(resources_path, "wolf.png")
+ portrayal["scale"] = 0.9
+ portrayal["Layer"] = 2
+ portrayal["text"] = round(agent.energy, 1)
+ portrayal["text_color"] = "White"
+
+ elif type(agent) is GrassPatch:
+ portrayal["Color"] = (
+ ["#00FF00", "#00CC00", "#009900"]
+ if agent.fully_grown
+ else ["#84e184", "#adebad", "#d6f5d6"]
+ )
+ portrayal["Shape"] = "rect"
+ portrayal["Filled"] = "true"
+ portrayal["Layer"] = 0
+ portrayal["w"] = 1
+ portrayal["h"] = 1
+ return portrayal
+
+
+canvas_element = mesa.visualization.CanvasGrid(wolf_sheep_portrayal, 20, 20, 500, 500)
+chart_element = mesa.visualization.ChartModule(
+ [
+ {"Label": "Wolves", "Color": "#AA0000"},
+ {"Label": "Sheep", "Color": "#666666"},
+ {"Label": "Grass", "Color": "#00AA00"},
+ ]
+)
+
+model_params = {
+ "height": 20,
+ "width": 20,
+ "model_path": None,
+ "title": mesa.visualization.StaticText("Parameters:"),
+ "grass": mesa.visualization.Checkbox("Grass Enabled", True),
+ "grass_regrowth_time": mesa.visualization.Slider("Grass Regrowth Time", 20, 1, 50),
+ "initial_sheep": mesa.visualization.Slider(
+ "Initial Sheep Population", 100, 10, 300
+ ),
+ "sheep_reproduce": mesa.visualization.Slider(
+ "Sheep Reproduction Rate", 0.04, 0.01, 1.0, 0.01
+ ),
+ "initial_wolves": mesa.visualization.Slider("Initial Wolf Population", 25, 10, 300),
+ "wolf_reproduce": mesa.visualization.Slider(
+ "Wolf Reproduction Rate",
+ 0.05,
+ 0.01,
+ 1.0,
+ 0.01,
+ description="The rate at which wolf agents reproduce.",
+ ),
+ "wolf_gain_from_food": mesa.visualization.Slider(
+ "Wolf Gain From Food Rate", 20, 1, 50
+ ),
+ "sheep_gain_from_food": mesa.visualization.Slider("Sheep Gain From Food", 4, 1, 10),
+}
+
+
+def run_model(height=20, width=20, model_path=None):
+ model_params["height"] = height
+ model_params["width"] = width
+ model_params["model_path"] = model_path
+ server = mesa.visualization.ModularServer(
+ WolfSheepServer,
+ [canvas_element, chart_element],
+ "Wolf Sheep Predation",
+ model_params,
+ )
+ return server
diff --git a/rl/wolf_sheep/train_config.py b/rl/wolf_sheep/train_config.py
new file mode 100644
index 00000000..f3c4fdb7
--- /dev/null
+++ b/rl/wolf_sheep/train_config.py
@@ -0,0 +1,45 @@
+import os
+
+from ray.rllib.algorithms.ppo import PPOConfig
+from ray.rllib.policy.policy import PolicySpec
+
+from .model import WolfSheepRL
+
+
+# Configuration to train the model
+# Feel free to adjust the configuration as necessary
+def env_creator(_):
+ return WolfSheepRL(
+ width=20,
+ height=20,
+ initial_sheep=100,
+ initial_wolves=25,
+ sheep_reproduce=0.04,
+ wolf_reproduce=0.05,
+ wolf_gain_from_food=20,
+ grass=True,
+ grass_regrowth_time=30,
+ sheep_gain_from_food=4,
+ )
+
+
+config = {
+ "env_name": "WorldSheepModel-v0",
+ "env_creator": env_creator,
+ "framework": "torch", # Assuming you want to use PyTorch
+ "train_batch_size": 150, # Assuming a default value, adjust as necessary
+ "policies": {
+ "policy_sheep": PolicySpec(config=PPOConfig.overrides(framework_str="torch")),
+ "policy_wolf": PolicySpec(config=PPOConfig.overrides(framework_str="torch")),
+ },
+ "policy_mapping_fn": lambda agent_id, *args, **kwargs: "policy_sheep"
+ if agent_id[0:5] == "sheep"
+ else "policy_wolf",
+ "policies_to_train": ["policy_sheep", "policy_wolf"],
+ "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "1")),
+ "num_learners": 50, # Assuming a default value, adjust as necessary
+ "num_env_runners": 20, # Assuming a default value, adjust as necessary
+ "num_envs_per_env_runner": 1, # Assuming a default value, adjust as necessary
+ "batch_mode": "truncate_episodes", # Assuming a default value, adjust as necessary
+ "rollout_fragment_length": "auto", # Assuming a default value, adjust as necessary
+}
diff --git a/rl/wolf_sheep/utility.py b/rl/wolf_sheep/utility.py
new file mode 100644
index 00000000..b65a49ee
--- /dev/null
+++ b/rl/wolf_sheep/utility.py
@@ -0,0 +1,80 @@
+def create_intial_agents(self, SheepRL, WolfRL, GrassPatch):
+ # Create sheep:
+ for i in range(self.initial_sheep):
+ x = self.random.randrange(self.width)
+ y = self.random.randrange(self.height)
+ energy = self.random.randrange(2 * self.sheep_gain_from_food)
+ unique_id_str = f"sheep_{self.next_id()}"
+ sheep = SheepRL(unique_id_str, None, self, True, energy)
+ self.grid.place_agent(sheep, (x, y))
+ self.schedule.add(sheep)
+
+ # Create wolves
+ for i in range(self.initial_wolves):
+ x = self.random.randrange(self.width)
+ y = self.random.randrange(self.height)
+ energy = self.random.randrange(2 * self.wolf_gain_from_food)
+ unique_id_str = f"wolf_{self.next_id()}"
+ wolf = WolfRL(unique_id_str, None, self, True, energy)
+ self.grid.place_agent(wolf, (x, y))
+ self.schedule.add(wolf)
+
+ # Create grass patches
+ if self.grass:
+ for agent, (x, y) in self.grid.coord_iter():
+ fully_grown = self.random.choice([True, False])
+
+ if fully_grown:
+ countdown = self.grass_regrowth_time
+ else:
+ countdown = self.random.randrange(self.grass_regrowth_time)
+
+ unique_id_str = f"grass_{self.next_id()}"
+ patch = GrassPatch(unique_id_str, None, self, fully_grown, countdown)
+ self.grid.place_agent(patch, (x, y))
+ self.schedule.add(patch)
+
+
+def move(self, action):
+ empty_neighbors = self.model.grid.get_neighborhood(
+ self.pos, moore=True, include_center=False
+ )
+
+ # Define the movement deltas
+ moves = {
+ 0: (1, 0), # Move right
+ 1: (-1, 0), # Move left
+ 2: (0, -1), # Move up
+ 3: (0, 1), # Move down
+ }
+
+ # Get the delta for the action, defaulting to (0, 0) if the action is invalid
+ dx, dy = moves.get(int(action), (0, 0))
+
+ # Calculate the new position and wrap around the grid
+ new_position = (
+ (self.pos[0] + dx) % self.model.grid.width,
+ (self.pos[1] + dy) % self.model.grid.height,
+ )
+
+ # Move the agent if the new position is in empty_neighbors
+ if new_position in empty_neighbors:
+ self.model.grid.move_agent(self, new_position)
+
+
+def grid_to_observation(self, SheepRL, WolfRL, GrassPatch):
+ # Convert grid to matrix for better representation
+ self.obs_grid = []
+ for i in self.grid._grid:
+ row = []
+ for j in i:
+ value = [0, 0, 0]
+ for agent in j:
+ if isinstance(agent, SheepRL):
+ value[0] = 1
+ elif isinstance(agent, WolfRL):
+ value[1] = 1
+ elif isinstance(agent, GrassPatch) and agent.fully_grown:
+ value[2] = 1
+ row.append(value)
+ self.obs_grid.append(row)