diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml index 68054a62..1f147fe1 100644 --- a/.github/workflows/build-publish.yml +++ b/.github/workflows/build-publish.yml @@ -20,9 +20,6 @@ jobs: strategy: matrix: include: - - os: ubuntu-latest - python: 37 - platform: manylinux_x86_64 - os: ubuntu-latest python: 38 platform: manylinux_x86_64 diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml index 1ee44cfe..46f02b26 100644 --- a/.github/workflows/linux-test.yml +++ b/.github/workflows/linux-test.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: ['3.7', '3.8', '3.9'] + python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} diff --git a/README.md b/README.md index 373e482d..d962ceaf 100755 --- a/README.md +++ b/README.md @@ -181,9 +181,6 @@ In the folder [nets/RESCO](https://github.com/LucasAlegre/sumo-rl/tree/master/ne ### Experiments -WARNING: Gym 0.26 had many breaking changes, stable-baselines3 and RLlib still do not support it, but will be updated soon. See [Stable Baselines 3 PR](https://github.com/DLR-RM/stable-baselines3/pull/780) and [RLib PR](https://github.com/ray-project/ray/pull/28369). -Hence, only the tabular Q-learning experiment is running without errors for now. - Check [experiments](https://github.com/LucasAlegre/sumo-rl/tree/master/experiments) for examples on how to instantiate an environment and train your RL agent. ### [Q-learning](https://github.com/LucasAlegre/sumo-rl/blob/master/agents/ql_agent.py) in a one-way single intersection: @@ -191,19 +188,20 @@ Check [experiments](https://github.com/LucasAlegre/sumo-rl/tree/master/experimen python experiments/ql_single-intersection.py ``` -### [RLlib A3C](https://github.com/ray-project/ray/tree/master/python/ray/rllib/agents/a3c) multiagent in a 4x4 grid: +### [RLlib PPO](https://docs.ray.io/en/latest/_modules/ray/rllib/algorithms/ppo/ppo.html) multiagent in a 4x4 grid: ```bash -python experiments/a3c_4x4grid.py +python experiments/ppo_4x4grid.py ``` ### [stable-baselines3 DQN](https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/dqn/dqn.py) in a 2-way single intersection: +Obs: you need to install stable-baselines3 with ```pip install "stable_baselines3[extra]>=2.0.0a9"``` for [Gymnasium compatibility](https://stable-baselines3.readthedocs.io/en/master/guide/install.html). ```bash python experiments/dqn_2way-single-intersection.py ``` ### Plotting results: ```bash -python outputs/plot.py -f outputs/2way-single-intersection/a3c +python outputs/plot.py -f outputs/4x4grid/ppo_conn0_ep2 ```
diff --git a/experiments/a3c_4x4grid.py b/experiments/a3c_4x4grid.py
deleted file mode 100755
index c655a073..00000000
--- a/experiments/a3c_4x4grid.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import os
-import sys
-
-
-if "SUMO_HOME" in os.environ:
- tools = os.path.join(os.environ["SUMO_HOME"], "tools")
- sys.path.append(tools)
-else:
- sys.exit("Please declare the environment variable 'SUMO_HOME'")
-import numpy as np
-import pandas as pd
-import ray
-import traci
-from gym import spaces
-from ray.rllib.agents.a3c.a3c import A3CTrainer
-from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy
-from ray.rllib.env import PettingZooEnv
-from ray.tune.registry import register_env
-
-import sumo_rl
-
-
-if __name__ == "__main__":
- ray.init()
-
- register_env(
- "4x4grid",
- lambda _: PettingZooEnv(
- sumo_rl.env(
- net_file="nets/4x4-Lucas/4x4.net.xml",
- route_file="nets/4x4-Lucas/4x4c1c2c1c2.rou.xml",
- out_csv_name="outputs/4x4grid/a3c",
- use_gui=False,
- num_seconds=80000,
- )
- ),
- )
-
- trainer = A3CTrainer(
- env="4x4grid",
- config={
- "multiagent": {
- "policies": {"0": (A3CTFPolicy, spaces.Box(low=np.zeros(11), high=np.ones(11)), spaces.Discrete(2), {})},
- "policy_mapping_fn": (lambda id: "0"), # Traffic lights are always controlled by this policy
- },
- "lr": 0.001,
- "no_done_at_end": True,
- },
- )
- while True:
- print(trainer.train()) # distributed training step
diff --git a/experiments/dqn_2way-single-intersection.py b/experiments/dqn_2way-single-intersection.py
index 24c59dff..0e192554 100755
--- a/experiments/dqn_2way-single-intersection.py
+++ b/experiments/dqn_2way-single-intersection.py
@@ -1,7 +1,7 @@
import os
import sys
-import gym
+import gymnasium as gym
from stable_baselines3.dqn.dqn import DQN
diff --git a/experiments/dqn_big-intersection.py b/experiments/dqn_big-intersection.py
index 5ef02f87..99db609e 100644
--- a/experiments/dqn_big-intersection.py
+++ b/experiments/dqn_big-intersection.py
@@ -1,7 +1,7 @@
import os
import sys
-import gym
+import gymnasium as gym
if "SUMO_HOME" in os.environ:
diff --git a/experiments/ppo_4x4grid.py b/experiments/ppo_4x4grid.py
new file mode 100755
index 00000000..a088507e
--- /dev/null
+++ b/experiments/ppo_4x4grid.py
@@ -0,0 +1,69 @@
+import os
+import sys
+
+
+if "SUMO_HOME" in os.environ:
+ tools = os.path.join(os.environ["SUMO_HOME"], "tools")
+ sys.path.append(tools)
+else:
+ sys.exit("Please declare the environment variable 'SUMO_HOME'")
+import numpy as np
+import pandas as pd
+import ray
+import traci
+from ray import tune
+from ray.rllib.algorithms.ppo import PPOConfig
+from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
+from ray.tune.registry import register_env
+
+import sumo_rl
+
+
+if __name__ == "__main__":
+ ray.init()
+
+ env_name = "4x4grid"
+
+ register_env(
+ env_name,
+ lambda _: ParallelPettingZooEnv(
+ sumo_rl.parallel_env(
+ net_file="nets/4x4-Lucas/4x4.net.xml",
+ route_file="nets/4x4-Lucas/4x4c1c2c1c2.rou.xml",
+ out_csv_name="outputs/4x4grid/ppo",
+ use_gui=False,
+ num_seconds=80000,
+ )
+ ),
+ )
+
+ config = (
+ PPOConfig()
+ .environment(env=env_name, disable_env_checking=True)
+ .rollouts(num_rollout_workers=4, rollout_fragment_length=128)
+ .training(
+ train_batch_size=512,
+ lr=2e-5,
+ gamma=0.95,
+ lambda_=0.9,
+ use_gae=True,
+ clip_param=0.4,
+ grad_clip=None,
+ entropy_coeff=0.1,
+ vf_loss_coeff=0.25,
+ sgd_minibatch_size=64,
+ num_sgd_iter=10,
+ )
+ .debugging(log_level="ERROR")
+ .framework(framework="torch")
+ .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
+ )
+
+ tune.run(
+ "PPO",
+ name="PPO",
+ stop={"timesteps_total": 100000},
+ checkpoint_freq=10,
+ local_dir="~/ray_results/" + env_name,
+ config=config.to_dict(),
+ )
diff --git a/pyproject.toml b/pyproject.toml
index f4a047f9..205ffdd0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,7 @@ sumo_rl = [
[tool.black]
safe = true
line-length = 127
-target-version = ['py37', 'py38', 'py39', 'py310']
+target-version = ['py38', 'py39', 'py310', 'py311']
include = '\.pyi?$'
[tool.isort]
@@ -69,7 +69,7 @@ exclude = ["**/node_modules", "**/__pycache__"]
strict = []
typeCheckingMode = "basic"
-pythonVersion = "3.7"
+pythonVersion = "3.8"
pythonPlatform = "All"
typeshedPath = "typeshed"
enableTypeIgnoreComments = true