adding carracing envs

stan-stripe · Jun 18, 2017 · 4facbbc · 4facbbc
1 parent e0c169a
commit 4facbbc
Show file tree

Hide file tree

Showing 5 changed files with 87 additions and 22 deletions.
diff --git a/envs.py b/envs.py
@@ -1,7 +1,7 @@
 import gym
 import numpy as np
 import universe
-from gym.spaces.box import Box
+from gym.spaces import Box, Discrete
 from universe import vectorized
 from universe.wrappers import Unvectorize, Vectorize
 
@@ -19,6 +19,63 @@ def create_atari_env(env_id):
     return env
 
 
+def create_car_racing_env():
+    env = gym.make('CarRacing-v0')
+    env = Vectorize(env)
+    env = CarRacingRescale32x32(env)
+    env = NormalizedEnv(env)
+    env = CarRacingDiscreteActions(env)
+    env = Unvectorize(env)
+    return env
+
+
+class CarRacingDiscreteActions(vectorized.ActionWrapper):
+
+    def __init__(self, env=None):
+        super(CarRacingDiscreteActions, self).__init__(env)
+        self.action_space = Discrete(5)
+        # 0 left
+        # 1 right
+        # 2 forward
+        # 3 brake
+        # 4 noop
+
+    def _make_continuous_action(self, a):
+        # print ("a = ", a)
+        act = np.array([0., 0., 0.])
+        if a == 0: # left
+            act = np.array([-1., 0., 0.])
+        elif a == 1: # right
+            act = np.array([1., 0., 0.])
+        elif a == 2: # gas
+            act = np.array([0., 1., 0.])
+        elif a == 3: # brake
+            act = np.array([0., 0., 1.])
+        elif a == 4: # noop
+            act = np.array([0., 0., 0.])
+        # print ("act: ", act)
+        return act
+
+    def _action(self, action_n):
+        return [self._make_continuous_action(a) for a in action_n]
+
+class CarRacingRescale32x32(vectorized.ObservationWrapper):
+
+    def __init__(self, env=None):
+        super(CarRacingRescale32x32, self).__init__(env)
+        self.observation_space = Box(0.0, 1.0, [1, 32, 32])
+
+    def _process_frame32(self, frame):
+        frame = cv2.resize(frame, (32, 32))
+        frame = frame.mean(2)
+        frame = frame.astype(np.float32)
+        frame *= (1.0 / 255.0)
+        frame = np.reshape(frame, [1, 32, 32])
+        return frame
+
+    def _observation(self, observation_n):
+        return [self._process_frame32(obs) for obs in observation_n]
+
 def _process_frame42(frame):
     frame = frame[34:34 + 160, :160]
     # Resize by half, then down to 42x42 (essentially mipmapping). If

diff --git a/main.py b/main.py
@@ -9,7 +9,7 @@
 import torch.multiprocessing as mp
 import torch.nn as nn
 import torch.nn.functional as F
-from envs import create_atari_env
+from envs import create_atari_env, create_car_racing_env
 from model import ActorCritic
 from train import train
 from test import test
@@ -41,9 +41,10 @@
 
     torch.manual_seed(args.seed)
 
-    env = create_atari_env(args.env_name)
+    # env = create_atari_env(args.env_name)
+    env = create_car_racing_env()
     model = ActorCritic(env.observation_space.shape[0], env.action_space)
 
     while True:
-        train(args, model)
-        test(args, model)
+        train(args, model, env)
+        test(args, model, env)
diff --git a/model.py b/model.py
@@ -7,7 +7,6 @@
 import torch.nn.functional as F
 from torch.autograd import Variable
 
-
 def normalized_columns_initializer(weights, std=1.0):
     out = torch.randn(weights.size())
     out *= std / torch.sqrt(out.pow(2).sum(1).expand_as(out))
@@ -41,11 +40,14 @@ def __init__(self, num_inputs, action_space):
         self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
         self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
 
-        self.lstm = nn.LSTMCell(32 * 3 * 3, 256)
+        self.lstm_size = 64
+
+        # self.lstm = nn.LSTMCell(32 * 3 * 3, 256)
+        self.lstm = nn.LSTMCell(32 * 2 * 2, self.lstm_size)
 
         num_outputs = action_space.n
-        self.critic_linear = nn.Linear(256, 1)
-        self.actor_linear = nn.Linear(256, num_outputs)
+        self.critic_linear = nn.Linear(self.lstm_size, 1)
+        self.actor_linear = nn.Linear(self.lstm_size, num_outputs)
 
         self.apply(weights_init)
         self.actor_linear.weight.data = normalized_columns_initializer(
@@ -62,12 +64,14 @@ def __init__(self, num_inputs, action_space):
 
     def forward(self, inputs):
         inputs, (hx, cx) = inputs
+        # print (inputs.size())
         x = F.elu(self.conv1(inputs))
         x = F.elu(self.conv2(x))
         x = F.elu(self.conv3(x))
         x = F.elu(self.conv4(x))
-
-        x = x.view(-1, 32 * 3 * 3)
+        # print (x.size())
+        # x = x.view(-1, 32 * 3 * 3)
+        x = x.view(-1, 32 * 2 * 2)
         hx, cx = self.lstm(x, (hx, cx))
         x = hx
 

diff --git a/test.py b/test.py
@@ -5,18 +5,19 @@
 import torch
 import torch.nn.functional as F
 import torch.optim as optim
-from envs import create_atari_env
+from envs import create_atari_env, create_car_racing_env
 from model import ActorCritic
 from torch.autograd import Variable
 from torchvision import datasets, transforms
 import time
 from collections import deque
 
 
-def test(args, model):
+def test(args, model, env):
     torch.manual_seed(args.seed)
 
-    env = create_atari_env(args.env_name)
+    # env = create_atari_env(args.env_name)
+    # env = create_car_racing_env()
     env.seed(args.seed)
 
     model = ActorCritic(env.observation_space.shape[0], env.action_space)
@@ -34,12 +35,13 @@ def test(args, model):
     actions = deque(maxlen=100)
     episode_length = 0
     while True:
+        env.render()
         episode_length += 1
         # Sync with the shared model
         if done:
             # model.load_state_dict(shared_model.state_dict())
-            cx = Variable(torch.zeros(1, 256), volatile=True)
-            hx = Variable(torch.zeros(1, 256), volatile=True)
+            cx = Variable(torch.zeros(1, model.lstm_size), volatile=True)
+            hx = Variable(torch.zeros(1, model.lstm_size), volatile=True)
         else:
             cx = Variable(cx.data, volatile=True)
             hx = Variable(hx.data, volatile=True)

diff --git a/train.py b/train.py
@@ -5,15 +5,16 @@
 import torch
 import torch.nn.functional as F
 import torch.optim as optim
-from envs import create_atari_env
+from envs import create_atari_env, create_car_racing_env
 from model import ActorCritic
 from torch.autograd import Variable
 from torchvision import datasets, transforms
 
-def train(args, model, optimizer=None):
+def train(args, model, env, optimizer=None):
     torch.manual_seed(args.seed)
 
-    env = create_atari_env(args.env_name)
+    # env = create_atari_env(args.env_name)
+    # env = create_car_racing_env()
     print ("env: ", env.observation_space.shape, env.action_space)
     env.seed(args.seed)
 
@@ -32,13 +33,13 @@ def train(args, model, optimizer=None):
     episode_length = 0
     u = 0
     while u < args.num_updates:
-        # print ("update: ", u)
+        print ("update: ", u)
         episode_length += 1
         # Sync with the shared model
         # model.load_state_dict(shared_model.state_dict())
         if done:
-            cx = Variable(torch.zeros(1, 256))
-            hx = Variable(torch.zeros(1, 256))
+            cx = Variable(torch.zeros(1, model.lstm_size))
+            hx = Variable(torch.zeros(1, model.lstm_size))
         else:
             cx = Variable(cx.data)
             hx = Variable(hx.data)