diff --git a/rl/boltzmann_money/README.md b/rl/boltzmann_money/README.md new file mode 100644 index 00000000..195abe39 --- /dev/null +++ b/rl/boltzmann_money/README.md @@ -0,0 +1,15 @@ +# Balancing Wealth Inequality +This folder showcases how to solve the Boltzmann wealth model with Proximal Policy Optimization (PPO) from Stable Baselines. + +## Key features: + +- Boltzmann Wealth Model: Agents with varying wealth navigate a grid, aiming to minimize inequality measured by the Gini coefficient. +- PPO Training: A PPO agent is trained to achieve this goal, receiving sparse rewards based on Gini coefficient improvement and a large terminal reward for achieving low inequality. +- Mesa Data Collection and Visualization: The Mesa data collector tool tracks Gini values during training, allowing for real-time visualization. +- Visualization Script: Visualize the trained agent's behavior with Mesa's visualization tools, presenting agent movement and Gini values within the grid. You can run `server.py` file to test it with pre-trained model. + +## Model Behaviour +As stable baselines controls multiple agents with the same weight, this results in the agents learning to move towards a corner of the grid. These brings all the agents together allowing exchange of money between them resulting in reward maximization. +

+ +

\ No newline at end of file diff --git a/rl/boltzmann_money/model.py b/rl/boltzmann_money/model.py new file mode 100644 index 00000000..75479b0b --- /dev/null +++ b/rl/boltzmann_money/model.py @@ -0,0 +1,161 @@ +""" +This code implements a multi-agent model called MoneyModel using the Mesa library. +The model simulates the distribution of wealth among agents in a grid environment. +Each agent has a randomly assigned wealth and can move to neighboring cells. +Agents can also give money to other agents in the same cell if they have greater wealth. +The model is trained by a scientist who believes in an equal society and wants to minimize the Gini coefficient, which measures wealth inequality. +The model is trained using the Proximal Policy Optimization (PPO) algorithm from the stable-baselines3 library. +The trained model is saved as "ppo_money_model". +""" + +import random + +import gymnasium +import matplotlib.pyplot as plt + +# Import mesa +import mesa + +# Import necessary libraries +import numpy as np +import seaborn as sns +from mesa_models.boltzmann_wealth_model.model import ( + BoltzmannWealthModel, + MoneyAgent, + compute_gini, +) + +NUM_AGENTS = 10 + + +# Define the agent class +class MoneyAgentRL(MoneyAgent): + def __init__(self, unique_id, model): + super().__init__(unique_id, model) + self.wealth = np.random.randint(1, NUM_AGENTS) + + def move(self, action): + empty_neighbors = self.model.grid.get_neighborhood( + self.pos, moore=True, include_center=False + ) + + # Define the movement deltas + moves = { + 0: (1, 0), # Move right + 1: (-1, 0), # Move left + 2: (0, -1), # Move up + 3: (0, 1), # Move down + 4: (0, 0), # Stay in place + } + + # Get the delta for the action, defaulting to (0, 0) if the action is invalid + dx, dy = moves.get(int(action), (0, 0)) + + # Calculate the new position and wrap around the grid + new_position = ( + (self.pos[0] + dx) % self.model.grid.width, + (self.pos[1] + dy) % self.model.grid.height, + ) + + # Move the agent if the new position is in empty_neighbors + if new_position in empty_neighbors: + self.model.grid.move_agent(self, new_position) + + def take_money(self): + # Get all agents in the same cell + cellmates = self.model.grid.get_cell_list_contents([self.pos]) + if len(cellmates) > 1: + # Choose a random agent from the cellmates + other_agent = random.choice(cellmates) + if other_agent.wealth > self.wealth: + # Transfer money from other_agent to self + other_agent.wealth -= 1 + self.wealth += 1 + + def step(self): + # Get the action for the agent + action = self.model.action_dict[self.unique_id] + # Move the agent based on the action + self.move(action) + # Take money from other agents in the same cell + self.take_money() + + +# Define the model class +class BoltzmannWealthModelRL(BoltzmannWealthModel, gymnasium.Env): + def __init__(self, N, width, height): + super().__init__(N, width, height) + # Define the observation and action space for the RL model + # The observation space is the wealth of each agent and their position + self.observation_space = gymnasium.spaces.Box(low=0, high=10 * N, shape=(N, 3)) + # The action space is a MultiDiscrete space with 5 possible actions for each agent + self.action_space = gymnasium.spaces.MultiDiscrete([5] * N) + self.is_visualize = False + + def step(self, action): + self.action_dict = action + # Perform one step of the model + self.schedule.step() + # Collect data for visualization + self.datacollector.collect(self) + # Compute the new Gini coefficient + new_gini = compute_gini(self) + # Compute the reward based on the change in Gini coefficient + reward = self.calculate_reward(new_gini) + self.prev_gini = new_gini + # Get the observation for the RL model + obs = self._get_obs() + if self.schedule.time > 5 * NUM_AGENTS: + # Terminate the episode if the model has run for a certain number of timesteps + done = True + reward = -1 + elif new_gini < 0.1: + # Terminate the episode if the Gini coefficient is below a certain threshold + done = True + reward = 50 / self.schedule.time + else: + done = False + info = {} + truncated = False + return obs, reward, done, truncated, info + + def calculate_reward(self, new_gini): + if new_gini < self.prev_gini: + # Compute the reward based on the decrease in Gini coefficient + reward = (self.prev_gini - new_gini) * 20 + else: + # Penalize for increase in Gini coefficient + reward = -0.05 + self.prev_gini = new_gini + return reward + + def visualize(self): + # Visualize the Gini coefficient over time + gini = self.datacollector.get_model_vars_dataframe() + g = sns.lineplot(data=gini) + g.set(title="Gini Coefficient over Time", ylabel="Gini Coefficient") + plt.show() + + def reset(self, *, seed=None, options=None): + if self.is_visualize: + # Visualize the Gini coefficient before resetting the model + self.visualize() + super().reset() + self.grid = mesa.space.MultiGrid(self.grid.width, self.grid.height, True) + self.schedule = mesa.time.RandomActivation(self) + for i in range(self.num_agents): + # Create MoneyAgentRL instances and add them to the schedule + a = MoneyAgentRL(i, self) + self.schedule.add(a) + x = self.random.randrange(self.grid.width) + y = self.random.randrange(self.grid.height) + self.grid.place_agent(a, (x, y)) + self.prev_gini = compute_gini(self) + return self._get_obs(), {} + + def _get_obs(self): + # The observation is the wealth of each agent and their position + obs = [] + for a in self.schedule.agents: + obs.append([a.wealth, *list(a.pos)]) + return np.array(obs) diff --git a/rl/boltzmann_money/ppo_agent.gif b/rl/boltzmann_money/ppo_agent.gif new file mode 100644 index 00000000..ac2d2520 Binary files /dev/null and b/rl/boltzmann_money/ppo_agent.gif differ diff --git a/rl/boltzmann_money/server.py b/rl/boltzmann_money/server.py new file mode 100644 index 00000000..8dbbc1e7 --- /dev/null +++ b/rl/boltzmann_money/server.py @@ -0,0 +1,69 @@ +import os + +import mesa +from mesa.visualization.ModularVisualization import ModularServer +from mesa.visualization.modules import ChartModule +from model import BoltzmannWealthModelRL +from stable_baselines3 import PPO + + +# Modify the MoneyModel class to take actions from the RL model +class MoneyModelRL(BoltzmannWealthModelRL): + def __init__(self, N, width, height): + super().__init__(N, width, height) + model_path = os.path.join( + os.path.dirname(__file__), "..", "model", "boltzmann_money.zip" + ) + self.rl_model = PPO.load(model_path) + self.reset() + + def step(self): + # Collect data + self.datacollector.collect(self) + + # Get observations which is the wealth of each agent and their position + obs = self._get_obs() + + action, _states = self.rl_model.predict(obs) + self.action_dict = action + self.schedule.step() + + +# Define the agent portrayal with different colors for different wealth levels +def agent_portrayal(agent): + if agent.wealth > 10: + color = "purple" + elif agent.wealth > 7: + color = "red" + elif agent.wealth > 5: + color = "orange" + elif agent.wealth > 3: + color = "yellow" + else: + color = "blue" + + portrayal = { + "Shape": "circle", + "Filled": "true", + "Layer": 0, + "Color": color, + "r": 0.5, + } + return portrayal + + +if __name__ == "__main__": + # Define a grid visualization + grid = mesa.visualization.CanvasGrid(agent_portrayal, 10, 10, 500, 500) + + # Define a chart visualization + chart = ChartModule( + [{"Label": "Gini", "Color": "Black"}], data_collector_name="datacollector" + ) + + # Create a modular server + server = ModularServer( + MoneyModelRL, [grid, chart], "Money Model", {"N": 10, "width": 10, "height": 10} + ) + server.port = 8521 # The default + server.launch() diff --git a/rl/boltzmann_money/train.py b/rl/boltzmann_money/train.py new file mode 100644 index 00000000..cd3e1c24 --- /dev/null +++ b/rl/boltzmann_money/train.py @@ -0,0 +1,35 @@ +import argparse + +from model import NUM_AGENTS, BoltzmannWealthModelRL +from stable_baselines3 import PPO +from stable_baselines3.common.callbacks import EvalCallback + + +def rl_model(args): + # Create the environment + env = BoltzmannWealthModelRL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS) + eval_env = BoltzmannWealthModelRL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS) + eval_callback = EvalCallback( + eval_env, best_model_save_path="./logs/", log_path="./logs/", eval_freq=5000 + ) + # Define the PPO model + model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="./logs/") + + # Train the model + model.learn(total_timesteps=args.stop_timesteps, callback=[eval_callback]) + + # Save the model + model.save("ppo_money_model") + + +if __name__ == "__main__": + # Define the command line arguments + parser = argparse.ArgumentParser() + parser.add_argument( + "--stop-timesteps", + type=int, + default=NUM_AGENTS * 100, + help="Number of timesteps to train.", + ) + args = parser.parse_args() + rl_model(args) diff --git a/rl/wolf_sheep/README.md b/rl/wolf_sheep/README.md new file mode 100644 index 00000000..d877fc02 --- /dev/null +++ b/rl/wolf_sheep/README.md @@ -0,0 +1,33 @@ +# Collaborative Survival: Wolf-Sheep Predation Model + +This project demonstrates the use of the RLlib library to implement Multi-Agent Reinforcement Learning (MARL) in the classic Wolf-Sheep predation problem. The environment details can be found on the Mesa project's GitHub repository [here](https://github.com/projectmesa/mesa-examples/tree/main/examples/wolf_sheep). + +## Key Features + +**RLlib and Multi-Agent Learning**: +- **Library Utilized**: The project leverages the RLlib library to concurrently train two independent PPO (Proximal Policy Optimization) agents. +- **Agents**: + - **Wolf**: Predatory agent survives by eating sheeps + - **Sheep**: Prey agent survives by eating grass + - **Grass**: Grass is eaten by sheep and regrows with time + +**Input and Observation Space**: +- **Observation Grid**: Each agent's policy receives a 10x10 grid centered on itself as input. + - **Grid Details**: The grid incorporates information about the presence of other agents (wolves, sheep, and grass) within the grid. + - **Agent's Energy Level**: The agent's current energy level is also included in the observations. + +**Action Space**: +- **Action Space**: The action space is the ID of the neighboring tile to which the agent wants to move. + +**Behavior and Training Outcomes**: +- **Optimal Behavior**: + - **Wolf**: Learns to move towards the nearest sheep. + - **Sheep**: Learns to run away from wolves and is attracted to grass. +- **Density Variations**: You can vary the densities of sheep and wolves to observe different results. + +By leveraging RLlib and Multi-Agent Learning, this project provides insights into the dynamics of predator-prey relationships and optimal behavior strategies in a simulated environment. + + +

+ +

\ No newline at end of file diff --git a/rl/wolf_sheep/agents.py b/rl/wolf_sheep/agents.py new file mode 100644 index 00000000..c6e5e959 --- /dev/null +++ b/rl/wolf_sheep/agents.py @@ -0,0 +1,80 @@ +from mesa_models.wolf_sheep.agents import GrassPatch, Sheep, Wolf + +from .utility import move + + +class SheepRL(Sheep): + def step(self): + """ + The code is exactly same as mesa-example with the only difference being the move function and new sheep creation class. + Link : https://github.com/projectmesa/mesa-examples/blob/main/examples/wolf_sheep/wolf_sheep/agents.py + """ + action = self.model.action_dict[self.unique_id] + move(self, action) + + living = True + + if self.model.grass: + # Reduce energy + self.energy -= 1 + + # If there is grass available, eat it + this_cell = self.model.grid.get_cell_list_contents([self.pos]) + grass_patch = next(obj for obj in this_cell if isinstance(obj, GrassPatch)) + if grass_patch.fully_grown: + self.energy += self.model.sheep_gain_from_food + grass_patch.fully_grown = False + + # Death + if self.energy < 0: + self.model.grid.remove_agent(self) + self.model.schedule.remove(self) + living = False + + if living and self.random.random() < self.model.sheep_reproduce: + # Create a new sheep: + if self.model.grass: + self.energy /= 2 + unique_id_str = f"sheep_{self.model.next_id()}" + lamb = SheepRL(unique_id_str, self.pos, self.model, self.moore, self.energy) + self.model.grid.place_agent(lamb, self.pos) + self.model.schedule.add(lamb) + + +class WolfRL(Wolf): + def step(self): + """ + The code is exactly same as mesa-example with the only difference being the move function and new wolf creation class. + Link : https://github.com/projectmesa/mesa-examples/blob/main/examples/wolf_sheep/wolf_sheep/agents.py + """ + action = self.model.action_dict[self.unique_id] + move(self, action) + + self.energy -= 1 + + # If there are sheep present, eat one + x, y = self.pos + this_cell = self.model.grid.get_cell_list_contents([self.pos]) + sheep = [obj for obj in this_cell if isinstance(obj, Sheep)] + if len(sheep) > 0: + sheep_to_eat = self.random.choice(sheep) + self.energy += self.model.wolf_gain_from_food + + # Kill the sheep + self.model.grid.remove_agent(sheep_to_eat) + self.model.schedule.remove(sheep_to_eat) + + # Death or reproduction + if self.energy < 0: + self.model.grid.remove_agent(self) + self.model.schedule.remove(self) + else: + if self.random.random() < self.model.wolf_reproduce: + # Create a new wolf cub + self.energy /= 2 + unique_id_str = f"wolf_{self.model.next_id()}" + cub = WolfRL( + unique_id_str, self.pos, self.model, self.moore, self.energy + ) + self.model.grid.place_agent(cub, cub.pos) + self.model.schedule.add(cub) diff --git a/rl/wolf_sheep/model.py b/rl/wolf_sheep/model.py new file mode 100644 index 00000000..ee580c56 --- /dev/null +++ b/rl/wolf_sheep/model.py @@ -0,0 +1,178 @@ +import gymnasium as gym +import mesa +import numpy as np +from mesa_models.wolf_sheep.agents import GrassPatch +from mesa_models.wolf_sheep.model import WolfSheep +from mesa_models.wolf_sheep.scheduler import RandomActivationByTypeFiltered +from ray.rllib.env import MultiAgentEnv + +from .agents import SheepRL, WolfRL +from .utility import create_intial_agents, grid_to_observation + + +class WolfSheepRL(WolfSheep, MultiAgentEnv): + """ + WolfRL-Sheep Predation Model + """ + + def __init__( + self, + width=20, + height=20, + initial_sheep=100, + initial_wolves=25, + sheep_reproduce=0.04, + wolf_reproduce=0.05, + wolf_gain_from_food=20, + grass=True, + grass_regrowth_time=30, + sheep_gain_from_food=4, + vision=4, + ): + """ + Create a new WolfRL-Sheep model with the given parameters. + """ + super().__init__( + width, + height, + initial_sheep, + initial_wolves, + sheep_reproduce, + wolf_reproduce, + wolf_gain_from_food, + grass, + grass_regrowth_time, + sheep_gain_from_food, + ) + # Defining RL specific attributes + self.vision = vision + # The observation space is a dictionary containing the grid and energy of the agent + self.observation_space = gym.spaces.Dict( + { + "grid": gym.spaces.Box( + low=0, high=1, shape=((self.vision * 2 + 1) ** 2 - 1, 3), dtype=int + ), # 3 for sheep, wolf, grass + "energy": gym.spaces.Box( + low=-1, high=np.inf, shape=(1,), dtype=np.float32 + ), + } + ) + # The action space is a discrete space with 5 actions of moving up, down, left, right + self.action_space = gym.spaces.Discrete(4) + self.max_steps = 500 + self.datacollector = mesa.DataCollector( + { + "Wolves": lambda m: m.schedule.get_type_count(WolfRL), + "Sheep": lambda m: m.schedule.get_type_count(SheepRL), + "Grass": lambda m: m.schedule.get_type_count( + GrassPatch, lambda x: x.fully_grown + ), + } + ) + + def step(self, action_dict): + self.action_dict = action_dict + self.schedule.step() + self.datacollector.collect(self) + + # Get rewards + rewards = self.cal_reward() + + # Get observations + # We convert grid to a matrix and then neighbors of each agent is extracted + grid_to_observation(self, SheepRL, WolfRL, GrassPatch) + obs = {} + for agent in self.schedule.agents: + if isinstance(agent, (SheepRL, WolfRL)): + neighbors = agent.model.grid.get_neighborhood( + agent.pos, moore=True, radius=self.vision + ) + + obs[agent.unique_id] = { + "grid": np.array( + [ + self.obs_grid[neighbor[0]][neighbor[1]] + for neighbor in neighbors + ] + ), + "energy": np.array([agent.energy]), + } + + # Either time finishes or either wolves or sheep are extinct + done = { + a.unique_id: False + for a in self.schedule.agents + if isinstance(a, (SheepRL, WolfRL)) + } + + # Check if either wolves or sheep are extinct + if ( + self.schedule.get_type_count(WolfRL) == 0 + or self.schedule.get_type_count(SheepRL) == 0 + or self.schedule.time > self.max_steps + ): + done["__all__"] = True + else: + done["__all__"] = False + + # Prepare info dictionary + truncated = { + a.unique_id: False + for a in self.schedule.agents + if isinstance(a, (SheepRL, WolfRL)) + } + truncated["__all__"] = np.all(list(truncated.values())) + + # All the agents that dies during this step are marked as done and rewarded penalty + sample = next(iter(obs.values())) + for agent_id in action_dict: + if agent_id not in rewards: + done[agent_id] = True + rewards[agent.unique_id] = -20 + truncated[agent.unique_id] = False + # generate a sample observation with 0 -1 + + obs[agent_id] = { + "grid": np.zeros_like(sample["grid"]), + "energy": np.array([-1]), + } + + return obs, rewards, done, truncated, {} + + def cal_reward(self): + rewards = {} + # Calculate rewards + # Agents are rewarded for being alive and having energy + for agent in self.schedule.agents: + if isinstance(agent, (SheepRL, WolfRL)): + if isinstance(agent, SheepRL): + rewards[agent.unique_id] = min(4, agent.energy - 4) + else: + rewards[agent.unique_id] = min(4, agent.energy / 5 - 4) + return rewards + + def reset(self, *, seed=None, options=None): + # Reset your environment here + super().reset() + self.schedule = RandomActivationByTypeFiltered(self) + self.grid = mesa.space.MultiGrid(self.width, self.height, torus=True) + self.current_id = 0 + create_intial_agents(self, SheepRL, WolfRL, GrassPatch) + grid_to_observation(self, SheepRL, WolfRL, GrassPatch) + obs = {} + for agent in self.schedule.agents: + if isinstance(agent, (SheepRL, WolfRL)): + neighbors = agent.model.grid.get_neighborhood( + agent.pos, moore=True, radius=self.vision + ) + + obs[agent.unique_id] = { + "grid": np.array( + [ + self.obs_grid[neighbor[0]][neighbor[1]] + for neighbor in neighbors + ] + ), + "energy": np.array([agent.energy]), + } + return obs, {} diff --git a/rl/wolf_sheep/resources/sheep.png b/rl/wolf_sheep/resources/sheep.png new file mode 100644 index 00000000..dfb81b0e Binary files /dev/null and b/rl/wolf_sheep/resources/sheep.png differ diff --git a/rl/wolf_sheep/resources/wolf.png b/rl/wolf_sheep/resources/wolf.png new file mode 100644 index 00000000..5357b855 Binary files /dev/null and b/rl/wolf_sheep/resources/wolf.png differ diff --git a/rl/wolf_sheep/resources/wolf_sheep.gif b/rl/wolf_sheep/resources/wolf_sheep.gif new file mode 100644 index 00000000..08d3e589 Binary files /dev/null and b/rl/wolf_sheep/resources/wolf_sheep.gif differ diff --git a/rl/wolf_sheep/server.py b/rl/wolf_sheep/server.py new file mode 100644 index 00000000..f21c54c0 --- /dev/null +++ b/rl/wolf_sheep/server.py @@ -0,0 +1,190 @@ +import os + +import mesa +import numpy as np +from mesa_models.wolf_sheep.agents import GrassPatch +from ray import tune +from ray.rllib.algorithms.algorithm import Algorithm + +from .agents import SheepRL, WolfRL +from .model import WolfSheepRL +from .utility import grid_to_observation + + +class WolfSheepServer(WolfSheepRL): + def __init__( + self, + width=20, + height=20, + initial_sheep=100, + initial_wolves=25, + sheep_reproduce=0.04, + wolf_reproduce=0.05, + wolf_gain_from_food=20, + grass=True, + grass_regrowth_time=30, + sheep_gain_from_food=4, + model_path=None, + ): + super().__init__( + width, + height, + initial_sheep, + initial_wolves, + sheep_reproduce, + wolf_reproduce, + wolf_gain_from_food, + grass, + grass_regrowth_time, + sheep_gain_from_food, + ) + + def env_creator(_): + return WolfSheepRL( + width, + height, + initial_sheep, + initial_wolves, + sheep_reproduce, + wolf_reproduce, + wolf_gain_from_food, + grass, + grass_regrowth_time, + sheep_gain_from_food, + ) + + tune.register_env("WorldSheepModel-v0", env_creator) + self.iteration = 0 + # Load the model from checkpoint + checkpoint_path = model_path + algo = Algorithm.from_checkpoint(checkpoint_path) + self.wolf_policy = algo.get_policy("policy_wolf") + self.sheep_policy = algo.get_policy("policy_sheep") + + def step(self): + if self.iteration == 0: + self.reset() + self.datacollector.collect(self) + # Get the observation for each agent + grid_to_observation(self, SheepRL, WolfRL, GrassPatch) + obs = {} + for agent in self.schedule.agents: + if isinstance(agent, (SheepRL, WolfRL)): + neighbors = agent.model.grid.get_neighborhood( + agent.pos, moore=True, radius=self.vision + ) + obs[agent.unique_id] = { + "grid": np.array( + [ + self.obs_grid[neighbor[0]][neighbor[1]] + for neighbor in neighbors + ] + ), + "energy": np.array([agent.energy]), + } + action_dict = {} + # Get the action for each agent + for agent in self.schedule.agents: + if isinstance(agent, SheepRL): + action_dict[agent.unique_id] = self.sheep_policy.compute_single_action( + obs[agent.unique_id], explore=False + )[0] + elif isinstance(agent, WolfRL): + action_dict[agent.unique_id] = self.wolf_policy.compute_single_action( + obs[agent.unique_id], explore=False + )[0] + self.action_dict = action_dict + # Take a step in the environment + self.schedule.step() + self.iteration += 1 + if ( + self.schedule.get_type_count(WolfRL) == 0 + or self.schedule.get_type_count(SheepRL) == 0 + or self.schedule.time > self.max_steps + ): + self.running = False + + +def wolf_sheep_portrayal(agent): + if agent is None: + return + + portrayal = {} + file_path = os.path.dirname(os.path.abspath(__file__)) + resources_path = os.path.join(file_path, "resources") + + if type(agent) is SheepRL: + portrayal["Shape"] = os.path.join(resources_path, "sheep.png") + portrayal["scale"] = 0.9 + portrayal["Layer"] = 1 + + elif type(agent) is WolfRL: + portrayal["Shape"] = os.path.join(resources_path, "wolf.png") + portrayal["scale"] = 0.9 + portrayal["Layer"] = 2 + portrayal["text"] = round(agent.energy, 1) + portrayal["text_color"] = "White" + + elif type(agent) is GrassPatch: + portrayal["Color"] = ( + ["#00FF00", "#00CC00", "#009900"] + if agent.fully_grown + else ["#84e184", "#adebad", "#d6f5d6"] + ) + portrayal["Shape"] = "rect" + portrayal["Filled"] = "true" + portrayal["Layer"] = 0 + portrayal["w"] = 1 + portrayal["h"] = 1 + return portrayal + + +canvas_element = mesa.visualization.CanvasGrid(wolf_sheep_portrayal, 20, 20, 500, 500) +chart_element = mesa.visualization.ChartModule( + [ + {"Label": "Wolves", "Color": "#AA0000"}, + {"Label": "Sheep", "Color": "#666666"}, + {"Label": "Grass", "Color": "#00AA00"}, + ] +) + +model_params = { + "height": 20, + "width": 20, + "model_path": None, + "title": mesa.visualization.StaticText("Parameters:"), + "grass": mesa.visualization.Checkbox("Grass Enabled", True), + "grass_regrowth_time": mesa.visualization.Slider("Grass Regrowth Time", 20, 1, 50), + "initial_sheep": mesa.visualization.Slider( + "Initial Sheep Population", 100, 10, 300 + ), + "sheep_reproduce": mesa.visualization.Slider( + "Sheep Reproduction Rate", 0.04, 0.01, 1.0, 0.01 + ), + "initial_wolves": mesa.visualization.Slider("Initial Wolf Population", 25, 10, 300), + "wolf_reproduce": mesa.visualization.Slider( + "Wolf Reproduction Rate", + 0.05, + 0.01, + 1.0, + 0.01, + description="The rate at which wolf agents reproduce.", + ), + "wolf_gain_from_food": mesa.visualization.Slider( + "Wolf Gain From Food Rate", 20, 1, 50 + ), + "sheep_gain_from_food": mesa.visualization.Slider("Sheep Gain From Food", 4, 1, 10), +} + + +def run_model(height=20, width=20, model_path=None): + model_params["height"] = height + model_params["width"] = width + model_params["model_path"] = model_path + server = mesa.visualization.ModularServer( + WolfSheepServer, + [canvas_element, chart_element], + "Wolf Sheep Predation", + model_params, + ) + return server diff --git a/rl/wolf_sheep/train_config.py b/rl/wolf_sheep/train_config.py new file mode 100644 index 00000000..f3c4fdb7 --- /dev/null +++ b/rl/wolf_sheep/train_config.py @@ -0,0 +1,45 @@ +import os + +from ray.rllib.algorithms.ppo import PPOConfig +from ray.rllib.policy.policy import PolicySpec + +from .model import WolfSheepRL + + +# Configuration to train the model +# Feel free to adjust the configuration as necessary +def env_creator(_): + return WolfSheepRL( + width=20, + height=20, + initial_sheep=100, + initial_wolves=25, + sheep_reproduce=0.04, + wolf_reproduce=0.05, + wolf_gain_from_food=20, + grass=True, + grass_regrowth_time=30, + sheep_gain_from_food=4, + ) + + +config = { + "env_name": "WorldSheepModel-v0", + "env_creator": env_creator, + "framework": "torch", # Assuming you want to use PyTorch + "train_batch_size": 150, # Assuming a default value, adjust as necessary + "policies": { + "policy_sheep": PolicySpec(config=PPOConfig.overrides(framework_str="torch")), + "policy_wolf": PolicySpec(config=PPOConfig.overrides(framework_str="torch")), + }, + "policy_mapping_fn": lambda agent_id, *args, **kwargs: "policy_sheep" + if agent_id[0:5] == "sheep" + else "policy_wolf", + "policies_to_train": ["policy_sheep", "policy_wolf"], + "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "1")), + "num_learners": 50, # Assuming a default value, adjust as necessary + "num_env_runners": 20, # Assuming a default value, adjust as necessary + "num_envs_per_env_runner": 1, # Assuming a default value, adjust as necessary + "batch_mode": "truncate_episodes", # Assuming a default value, adjust as necessary + "rollout_fragment_length": "auto", # Assuming a default value, adjust as necessary +} diff --git a/rl/wolf_sheep/utility.py b/rl/wolf_sheep/utility.py new file mode 100644 index 00000000..b65a49ee --- /dev/null +++ b/rl/wolf_sheep/utility.py @@ -0,0 +1,80 @@ +def create_intial_agents(self, SheepRL, WolfRL, GrassPatch): + # Create sheep: + for i in range(self.initial_sheep): + x = self.random.randrange(self.width) + y = self.random.randrange(self.height) + energy = self.random.randrange(2 * self.sheep_gain_from_food) + unique_id_str = f"sheep_{self.next_id()}" + sheep = SheepRL(unique_id_str, None, self, True, energy) + self.grid.place_agent(sheep, (x, y)) + self.schedule.add(sheep) + + # Create wolves + for i in range(self.initial_wolves): + x = self.random.randrange(self.width) + y = self.random.randrange(self.height) + energy = self.random.randrange(2 * self.wolf_gain_from_food) + unique_id_str = f"wolf_{self.next_id()}" + wolf = WolfRL(unique_id_str, None, self, True, energy) + self.grid.place_agent(wolf, (x, y)) + self.schedule.add(wolf) + + # Create grass patches + if self.grass: + for agent, (x, y) in self.grid.coord_iter(): + fully_grown = self.random.choice([True, False]) + + if fully_grown: + countdown = self.grass_regrowth_time + else: + countdown = self.random.randrange(self.grass_regrowth_time) + + unique_id_str = f"grass_{self.next_id()}" + patch = GrassPatch(unique_id_str, None, self, fully_grown, countdown) + self.grid.place_agent(patch, (x, y)) + self.schedule.add(patch) + + +def move(self, action): + empty_neighbors = self.model.grid.get_neighborhood( + self.pos, moore=True, include_center=False + ) + + # Define the movement deltas + moves = { + 0: (1, 0), # Move right + 1: (-1, 0), # Move left + 2: (0, -1), # Move up + 3: (0, 1), # Move down + } + + # Get the delta for the action, defaulting to (0, 0) if the action is invalid + dx, dy = moves.get(int(action), (0, 0)) + + # Calculate the new position and wrap around the grid + new_position = ( + (self.pos[0] + dx) % self.model.grid.width, + (self.pos[1] + dy) % self.model.grid.height, + ) + + # Move the agent if the new position is in empty_neighbors + if new_position in empty_neighbors: + self.model.grid.move_agent(self, new_position) + + +def grid_to_observation(self, SheepRL, WolfRL, GrassPatch): + # Convert grid to matrix for better representation + self.obs_grid = [] + for i in self.grid._grid: + row = [] + for j in i: + value = [0, 0, 0] + for agent in j: + if isinstance(agent, SheepRL): + value[0] = 1 + elif isinstance(agent, WolfRL): + value[1] = 1 + elif isinstance(agent, GrassPatch) and agent.fully_grown: + value[2] = 1 + row.append(value) + self.obs_grid.append(row)