Skip to content

Commit

Permalink
adding carracing envs
Browse files Browse the repository at this point in the history
  • Loading branch information
tpbarron committed Jun 18, 2017
1 parent e0c169a commit 4facbbc
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 22 deletions.
59 changes: 58 additions & 1 deletion envs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import gym
import numpy as np
import universe
from gym.spaces.box import Box
from gym.spaces import Box, Discrete
from universe import vectorized
from universe.wrappers import Unvectorize, Vectorize

Expand All @@ -19,6 +19,63 @@ def create_atari_env(env_id):
return env


def create_car_racing_env():
env = gym.make('CarRacing-v0')
env = Vectorize(env)
env = CarRacingRescale32x32(env)
env = NormalizedEnv(env)
env = CarRacingDiscreteActions(env)
env = Unvectorize(env)
return env


class CarRacingDiscreteActions(vectorized.ActionWrapper):

def __init__(self, env=None):
super(CarRacingDiscreteActions, self).__init__(env)
self.action_space = Discrete(5)
# 0 left
# 1 right
# 2 forward
# 3 brake
# 4 noop

def _make_continuous_action(self, a):
# print ("a = ", a)
act = np.array([0., 0., 0.])
if a == 0: # left
act = np.array([-1., 0., 0.])
elif a == 1: # right
act = np.array([1., 0., 0.])
elif a == 2: # gas
act = np.array([0., 1., 0.])
elif a == 3: # brake
act = np.array([0., 0., 1.])
elif a == 4: # noop
act = np.array([0., 0., 0.])
# print ("act: ", act)
return act

def _action(self, action_n):
return [self._make_continuous_action(a) for a in action_n]

class CarRacingRescale32x32(vectorized.ObservationWrapper):

def __init__(self, env=None):
super(CarRacingRescale32x32, self).__init__(env)
self.observation_space = Box(0.0, 1.0, [1, 32, 32])

def _process_frame32(self, frame):
frame = cv2.resize(frame, (32, 32))
frame = frame.mean(2)
frame = frame.astype(np.float32)
frame *= (1.0 / 255.0)
frame = np.reshape(frame, [1, 32, 32])
return frame

def _observation(self, observation_n):
return [self._process_frame32(obs) for obs in observation_n]

def _process_frame42(frame):
frame = frame[34:34 + 160, :160]
# Resize by half, then down to 42x42 (essentially mipmapping). If
Expand Down
9 changes: 5 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import torch.multiprocessing as mp
import torch.nn as nn
import torch.nn.functional as F
from envs import create_atari_env
from envs import create_atari_env, create_car_racing_env
from model import ActorCritic
from train import train
from test import test
Expand Down Expand Up @@ -41,9 +41,10 @@

torch.manual_seed(args.seed)

env = create_atari_env(args.env_name)
# env = create_atari_env(args.env_name)
env = create_car_racing_env()
model = ActorCritic(env.observation_space.shape[0], env.action_space)

while True:
train(args, model)
test(args, model)
train(args, model, env)
test(args, model, env)
16 changes: 10 additions & 6 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import torch.nn.functional as F
from torch.autograd import Variable


def normalized_columns_initializer(weights, std=1.0):
out = torch.randn(weights.size())
out *= std / torch.sqrt(out.pow(2).sum(1).expand_as(out))
Expand Down Expand Up @@ -41,11 +40,14 @@ def __init__(self, num_inputs, action_space):
self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)

self.lstm = nn.LSTMCell(32 * 3 * 3, 256)
self.lstm_size = 64

# self.lstm = nn.LSTMCell(32 * 3 * 3, 256)
self.lstm = nn.LSTMCell(32 * 2 * 2, self.lstm_size)

num_outputs = action_space.n
self.critic_linear = nn.Linear(256, 1)
self.actor_linear = nn.Linear(256, num_outputs)
self.critic_linear = nn.Linear(self.lstm_size, 1)
self.actor_linear = nn.Linear(self.lstm_size, num_outputs)

self.apply(weights_init)
self.actor_linear.weight.data = normalized_columns_initializer(
Expand All @@ -62,12 +64,14 @@ def __init__(self, num_inputs, action_space):

def forward(self, inputs):
inputs, (hx, cx) = inputs
# print (inputs.size())
x = F.elu(self.conv1(inputs))
x = F.elu(self.conv2(x))
x = F.elu(self.conv3(x))
x = F.elu(self.conv4(x))

x = x.view(-1, 32 * 3 * 3)
# print (x.size())
# x = x.view(-1, 32 * 3 * 3)
x = x.view(-1, 32 * 2 * 2)
hx, cx = self.lstm(x, (hx, cx))
x = hx

Expand Down
12 changes: 7 additions & 5 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,19 @@
import torch
import torch.nn.functional as F
import torch.optim as optim
from envs import create_atari_env
from envs import create_atari_env, create_car_racing_env
from model import ActorCritic
from torch.autograd import Variable
from torchvision import datasets, transforms
import time
from collections import deque


def test(args, model):
def test(args, model, env):
torch.manual_seed(args.seed)

env = create_atari_env(args.env_name)
# env = create_atari_env(args.env_name)
# env = create_car_racing_env()
env.seed(args.seed)

model = ActorCritic(env.observation_space.shape[0], env.action_space)
Expand All @@ -34,12 +35,13 @@ def test(args, model):
actions = deque(maxlen=100)
episode_length = 0
while True:
env.render()
episode_length += 1
# Sync with the shared model
if done:
# model.load_state_dict(shared_model.state_dict())
cx = Variable(torch.zeros(1, 256), volatile=True)
hx = Variable(torch.zeros(1, 256), volatile=True)
cx = Variable(torch.zeros(1, model.lstm_size), volatile=True)
hx = Variable(torch.zeros(1, model.lstm_size), volatile=True)
else:
cx = Variable(cx.data, volatile=True)
hx = Variable(hx.data, volatile=True)
Expand Down
13 changes: 7 additions & 6 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
import torch
import torch.nn.functional as F
import torch.optim as optim
from envs import create_atari_env
from envs import create_atari_env, create_car_racing_env
from model import ActorCritic
from torch.autograd import Variable
from torchvision import datasets, transforms

def train(args, model, optimizer=None):
def train(args, model, env, optimizer=None):
torch.manual_seed(args.seed)

env = create_atari_env(args.env_name)
# env = create_atari_env(args.env_name)
# env = create_car_racing_env()
print ("env: ", env.observation_space.shape, env.action_space)
env.seed(args.seed)

Expand All @@ -32,13 +33,13 @@ def train(args, model, optimizer=None):
episode_length = 0
u = 0
while u < args.num_updates:
# print ("update: ", u)
print ("update: ", u)
episode_length += 1
# Sync with the shared model
# model.load_state_dict(shared_model.state_dict())
if done:
cx = Variable(torch.zeros(1, 256))
hx = Variable(torch.zeros(1, 256))
cx = Variable(torch.zeros(1, model.lstm_size))
hx = Variable(torch.zeros(1, model.lstm_size))
else:
cx = Variable(cx.data)
hx = Variable(hx.data)
Expand Down

0 comments on commit 4facbbc

Please sign in to comment.