LucasAlegre · LucasAlegre · Jun 15, 2023 · Jun 15, 2023 · Jun 15, 2023 · Jun 15, 2023
diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml
@@ -20,9 +20,6 @@ jobs:
     strategy:
       matrix:
         include:
-        - os: ubuntu-latest
-          python: 37
-          platform: manylinux_x86_64
         - os: ubuntu-latest
           python: 38
           platform: manylinux_x86_64

diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        python-version: ['3.7', '3.8', '3.9']
+        python-version: ['3.8', '3.9', '3.10', '3.11']
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/README.md b/README.md
@@ -181,29 +181,27 @@ In the folder [nets/RESCO](https://github.com/LucasAlegre/sumo-rl/tree/master/ne
 
 ### Experiments
 
-WARNING: Gym 0.26 had many breaking changes, stable-baselines3 and RLlib still do not support it, but will be updated soon. See [Stable Baselines 3 PR](https://github.com/DLR-RM/stable-baselines3/pull/780) and [RLib PR](https://github.com/ray-project/ray/pull/28369).
-Hence, only the tabular Q-learning experiment is running without errors for now.
-
 Check [experiments](https://github.com/LucasAlegre/sumo-rl/tree/master/experiments) for examples on how to instantiate an environment and train your RL agent.
 
 ### [Q-learning](https://github.com/LucasAlegre/sumo-rl/blob/master/agents/ql_agent.py) in a one-way single intersection:
 ```bash
 python experiments/ql_single-intersection.py
 ```
 
-### [RLlib A3C](https://github.com/ray-project/ray/tree/master/python/ray/rllib/agents/a3c) multiagent in a 4x4 grid:
+### [RLlib PPO](https://docs.ray.io/en/latest/_modules/ray/rllib/algorithms/ppo/ppo.html) multiagent in a 4x4 grid:
 ```bash
-python experiments/a3c_4x4grid.py
+python experiments/ppo_4x4grid.py
 ```
 
 ### [stable-baselines3 DQN](https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/dqn/dqn.py) in a 2-way single intersection:
+Obs: you need to install stable-baselines3 with ```pip install "stable_baselines3[extra]>=2.0.0a9"``` for [Gymnasium compatibility](https://stable-baselines3.readthedocs.io/en/master/guide/install.html).
 ```bash
 python experiments/dqn_2way-single-intersection.py
 ```
 
 ### Plotting results:
 ```bash
-python outputs/plot.py -f outputs/2way-single-intersection/a3c
+python outputs/plot.py -f outputs/4x4grid/ppo_conn0_ep2
 ```
 <p align="center">
 <img src="outputs/result.png" align="center" width="50%"/>

diff --git a/experiments/a3c_4x4grid.py b/experiments/a3c_4x4grid.py
diff --git a/experiments/dqn_2way-single-intersection.py b/experiments/dqn_2way-single-intersection.py
@@ -1,7 +1,7 @@
 import os
 import sys
 
-import gym
+import gymnasium as gym
 from stable_baselines3.dqn.dqn import DQN
 
 

diff --git a/experiments/dqn_big-intersection.py b/experiments/dqn_big-intersection.py
@@ -1,7 +1,7 @@
 import os
 import sys
 
-import gym
+import gymnasium as gym
 
 
 if "SUMO_HOME" in os.environ:

diff --git a/experiments/ppo_4x4grid.py b/experiments/ppo_4x4grid.py
@@ -0,0 +1,69 @@
+import os
+import sys
+
+
+if "SUMO_HOME" in os.environ:
+    tools = os.path.join(os.environ["SUMO_HOME"], "tools")
+    sys.path.append(tools)
+else:
+    sys.exit("Please declare the environment variable 'SUMO_HOME'")
+import numpy as np
+import pandas as pd
+import ray
+import traci
+from ray import tune
+from ray.rllib.algorithms.ppo import PPOConfig
+from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
+from ray.tune.registry import register_env
+
+import sumo_rl
+
+
+if __name__ == "__main__":
+    ray.init()
+
+    env_name = "4x4grid"
+
+    register_env(
+        env_name,
+        lambda _: ParallelPettingZooEnv(
+            sumo_rl.parallel_env(
+                net_file="nets/4x4-Lucas/4x4.net.xml",
+                route_file="nets/4x4-Lucas/4x4c1c2c1c2.rou.xml",
+                out_csv_name="outputs/4x4grid/ppo",
+                use_gui=False,
+                num_seconds=80000,
+            )
+        ),
+    )
+
+    config = (
+        PPOConfig()
+        .environment(env=env_name, disable_env_checking=True)
+        .rollouts(num_rollout_workers=4, rollout_fragment_length=128)
+        .training(
+            train_batch_size=512,
+            lr=2e-5,
+            gamma=0.95,
+            lambda_=0.9,
+            use_gae=True,
+            clip_param=0.4,
+            grad_clip=None,
+            entropy_coeff=0.1,
+            vf_loss_coeff=0.25,
+            sgd_minibatch_size=64,
+            num_sgd_iter=10,
+        )
+        .debugging(log_level="ERROR")
+        .framework(framework="torch")
+        .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
+    )
+
+    tune.run(
+        "PPO",
+        name="PPO",
+        stop={"timesteps_total": 100000},
+        checkpoint_freq=10,
+        local_dir="~/ray_results/" + env_name,
+        config=config.to_dict(),
+    )
diff --git a/pyproject.toml b/pyproject.toml
@@ -51,7 +51,7 @@ sumo_rl = [
 [tool.black]
 safe = true
 line-length = 127
-target-version = ['py37', 'py38', 'py39', 'py310']
+target-version = ['py38', 'py39', 'py310', 'py311']
 include = '\.pyi?$'
 
 [tool.isort]
@@ -69,7 +69,7 @@ exclude = ["**/node_modules", "**/__pycache__"]
 strict = []
 
 typeCheckingMode = "basic"
-pythonVersion = "3.7"
+pythonVersion = "3.8"
 pythonPlatform = "All"
 typeshedPath = "typeshed"
 enableTypeIgnoreComments = true