-
Notifications
You must be signed in to change notification settings - Fork 87
/
Copy pathconfig.py
48 lines (35 loc) · 1.13 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import numpy as np
import random
train_envs = ['car_racing']
test_envs = ['car_racing']
def generate_data_action(t, env):
#for the car racing example, it is important to give the car a 'push' early in the exploration so that it can find different examples of curved track.
if t < 20:
a = np.array([-0.1,1,0])
else:
a = env.action_space.sample()
rn = random.randint(0,9)
if rn in [0]: #do nothing
a = np.array([0,0,0])
elif rn in [1,2,3,4]: #accelerate
a = np.array([0,random.random(),0])
elif rn in [5,6]: #left
a = np.array([-random.random(),0,0])
elif rn in [7,8]: #right
a = np.array([random.random(),0,0])
elif rn in [9]: #brake
a = np.array([0,0,random.random()])
else:
pass
#uncomment this line for truly random actions
#a = env.action_space.sample()
return a
def adjust_obs(obs):
# obs[obs==0] = 255
return obs.astype('float32') / 255.
def adjust_reward(reward):
if reward > 0:
reward = 1
else:
reward = 0
return reward