-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathparams.py
67 lines (54 loc) · 2.22 KB
/
params.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import mxnet as mx
class Breakout_Params(object):
def __init__(self):
self.ctx = mx.cpu()
self.log_interval = 1
self.lr = 2.5e-4
self.value_coefficient = 0.5
self.entropy_coefficient = 0.01
self.gamma = 0.99
self.lam = 0.95
self.nsteps = 128
self.nminibatches = 4
self.num_update_epochs = 4
self.lr_schedule = lambda x : x * 2.5e-4
self.clip_range_schedule = lambda x : x * 0.1
self.num_timesteps = int(10e6 * 1.1)
self.epsilon_min = 0.1
self.annealing_end = 1000000.
class Pendulum_Params(object):
def __init__(self):
self.ctx = mx.cpu()
self.actor_lr = 0.001
self.critic_lr = 0.002
self.clip_param = 0.2
self.value_coefficient = 0.5
self.entropy_coefficient = 0.01
self.gamma = 0.9 # Reward discount [0.9, 0.99]
self.lam = 0.95
self.nsteps = 64 # Number of steps in one roll-out
self.num_update_steps = 10 #
self.num_update_epochs = 10 # Number of parameter updates to do on 1 batch
self.nenvs = 1 # Number of parallel environments to run
self.num_timesteps = 10e6 # Total number of steps to take
self.nminibatches = 2 # Number of batches in one roll-out to train
self.clip_range = lambda f : f * 0.1
self.lr_schedule = lambda f : f * 2.5e-4
class CartPole_Params(object):
def __init__(self):
self.ctx = mx.cpu()
self.actor_lr = 0.001
self.critic_lr = 0.002
self.clip_param = 0.2
self.value_coefficient = 0.5
self.entropy_coefficient = 0.01
self.gamma = 0.99 # Reward discount [0.9, 0.99]
self.lam = 0.95
self.nsteps = 256 # Number of steps in one roll-out
self.num_update_steps = 10 #
self.num_update_epochs = 10 # Number of parameter updates to do on 1 batch
self.nenvs = 1 # Number of parallel environments to run
self.num_timesteps = 10e6 # Total number of steps to take
self.nminibatches = 4 # Number of batches in one roll-out to train
self.clip_range = lambda f : f * 0.1
self.lr_schedule = lambda f : f * 2.5e-4