diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml index 68054a62..1f147fe1 100644 --- a/.github/workflows/build-publish.yml +++ b/.github/workflows/build-publish.yml @@ -20,9 +20,6 @@ jobs: strategy: matrix: include: - - os: ubuntu-latest - python: 37 - platform: manylinux_x86_64 - os: ubuntu-latest python: 38 platform: manylinux_x86_64 diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml index 1ee44cfe..46f02b26 100644 --- a/.github/workflows/linux-test.yml +++ b/.github/workflows/linux-test.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: ['3.7', '3.8', '3.9'] + python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} diff --git a/README.md b/README.md index 373e482d..d962ceaf 100755 --- a/README.md +++ b/README.md @@ -181,9 +181,6 @@ In the folder [nets/RESCO](https://github.com/LucasAlegre/sumo-rl/tree/master/ne ### Experiments -WARNING: Gym 0.26 had many breaking changes, stable-baselines3 and RLlib still do not support it, but will be updated soon. See [Stable Baselines 3 PR](https://github.com/DLR-RM/stable-baselines3/pull/780) and [RLib PR](https://github.com/ray-project/ray/pull/28369). -Hence, only the tabular Q-learning experiment is running without errors for now. - Check [experiments](https://github.com/LucasAlegre/sumo-rl/tree/master/experiments) for examples on how to instantiate an environment and train your RL agent. ### [Q-learning](https://github.com/LucasAlegre/sumo-rl/blob/master/agents/ql_agent.py) in a one-way single intersection: @@ -191,19 +188,20 @@ Check [experiments](https://github.com/LucasAlegre/sumo-rl/tree/master/experimen python experiments/ql_single-intersection.py ``` -### [RLlib A3C](https://github.com/ray-project/ray/tree/master/python/ray/rllib/agents/a3c) multiagent in a 4x4 grid: +### [RLlib PPO](https://docs.ray.io/en/latest/_modules/ray/rllib/algorithms/ppo/ppo.html) multiagent in a 4x4 grid: ```bash -python experiments/a3c_4x4grid.py +python experiments/ppo_4x4grid.py ``` ### [stable-baselines3 DQN](https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/dqn/dqn.py) in a 2-way single intersection: +Obs: you need to install stable-baselines3 with ```pip install "stable_baselines3[extra]>=2.0.0a9"``` for [Gymnasium compatibility](https://stable-baselines3.readthedocs.io/en/master/guide/install.html). ```bash python experiments/dqn_2way-single-intersection.py ``` ### Plotting results: ```bash -python outputs/plot.py -f outputs/2way-single-intersection/a3c +python outputs/plot.py -f outputs/4x4grid/ppo_conn0_ep2 ```

diff --git a/experiments/a3c_4x4grid.py b/experiments/a3c_4x4grid.py deleted file mode 100755 index c655a073..00000000 --- a/experiments/a3c_4x4grid.py +++ /dev/null @@ -1,51 +0,0 @@ -import os -import sys - - -if "SUMO_HOME" in os.environ: - tools = os.path.join(os.environ["SUMO_HOME"], "tools") - sys.path.append(tools) -else: - sys.exit("Please declare the environment variable 'SUMO_HOME'") -import numpy as np -import pandas as pd -import ray -import traci -from gym import spaces -from ray.rllib.agents.a3c.a3c import A3CTrainer -from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy -from ray.rllib.env import PettingZooEnv -from ray.tune.registry import register_env - -import sumo_rl - - -if __name__ == "__main__": - ray.init() - - register_env( - "4x4grid", - lambda _: PettingZooEnv( - sumo_rl.env( - net_file="nets/4x4-Lucas/4x4.net.xml", - route_file="nets/4x4-Lucas/4x4c1c2c1c2.rou.xml", - out_csv_name="outputs/4x4grid/a3c", - use_gui=False, - num_seconds=80000, - ) - ), - ) - - trainer = A3CTrainer( - env="4x4grid", - config={ - "multiagent": { - "policies": {"0": (A3CTFPolicy, spaces.Box(low=np.zeros(11), high=np.ones(11)), spaces.Discrete(2), {})}, - "policy_mapping_fn": (lambda id: "0"), # Traffic lights are always controlled by this policy - }, - "lr": 0.001, - "no_done_at_end": True, - }, - ) - while True: - print(trainer.train()) # distributed training step diff --git a/experiments/dqn_2way-single-intersection.py b/experiments/dqn_2way-single-intersection.py index 24c59dff..0e192554 100755 --- a/experiments/dqn_2way-single-intersection.py +++ b/experiments/dqn_2way-single-intersection.py @@ -1,7 +1,7 @@ import os import sys -import gym +import gymnasium as gym from stable_baselines3.dqn.dqn import DQN diff --git a/experiments/dqn_big-intersection.py b/experiments/dqn_big-intersection.py index 5ef02f87..99db609e 100644 --- a/experiments/dqn_big-intersection.py +++ b/experiments/dqn_big-intersection.py @@ -1,7 +1,7 @@ import os import sys -import gym +import gymnasium as gym if "SUMO_HOME" in os.environ: diff --git a/experiments/ppo_4x4grid.py b/experiments/ppo_4x4grid.py new file mode 100755 index 00000000..a088507e --- /dev/null +++ b/experiments/ppo_4x4grid.py @@ -0,0 +1,69 @@ +import os +import sys + + +if "SUMO_HOME" in os.environ: + tools = os.path.join(os.environ["SUMO_HOME"], "tools") + sys.path.append(tools) +else: + sys.exit("Please declare the environment variable 'SUMO_HOME'") +import numpy as np +import pandas as pd +import ray +import traci +from ray import tune +from ray.rllib.algorithms.ppo import PPOConfig +from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv +from ray.tune.registry import register_env + +import sumo_rl + + +if __name__ == "__main__": + ray.init() + + env_name = "4x4grid" + + register_env( + env_name, + lambda _: ParallelPettingZooEnv( + sumo_rl.parallel_env( + net_file="nets/4x4-Lucas/4x4.net.xml", + route_file="nets/4x4-Lucas/4x4c1c2c1c2.rou.xml", + out_csv_name="outputs/4x4grid/ppo", + use_gui=False, + num_seconds=80000, + ) + ), + ) + + config = ( + PPOConfig() + .environment(env=env_name, disable_env_checking=True) + .rollouts(num_rollout_workers=4, rollout_fragment_length=128) + .training( + train_batch_size=512, + lr=2e-5, + gamma=0.95, + lambda_=0.9, + use_gae=True, + clip_param=0.4, + grad_clip=None, + entropy_coeff=0.1, + vf_loss_coeff=0.25, + sgd_minibatch_size=64, + num_sgd_iter=10, + ) + .debugging(log_level="ERROR") + .framework(framework="torch") + .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))) + ) + + tune.run( + "PPO", + name="PPO", + stop={"timesteps_total": 100000}, + checkpoint_freq=10, + local_dir="~/ray_results/" + env_name, + config=config.to_dict(), + ) diff --git a/pyproject.toml b/pyproject.toml index f4a047f9..205ffdd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ sumo_rl = [ [tool.black] safe = true line-length = 127 -target-version = ['py37', 'py38', 'py39', 'py310'] +target-version = ['py38', 'py39', 'py310', 'py311'] include = '\.pyi?$' [tool.isort] @@ -69,7 +69,7 @@ exclude = ["**/node_modules", "**/__pycache__"] strict = [] typeCheckingMode = "basic" -pythonVersion = "3.7" +pythonVersion = "3.8" pythonPlatform = "All" typeshedPath = "typeshed" enableTypeIgnoreComments = true