-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathutils.py
93 lines (73 loc) · 2.65 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import numpy as np
import random
import mxnet as mx
@mx.init.register
class FaninInit(mx.init.Initializer):
'''
Reference implementation:
def fanin_init(size, fanin=None):
fanin = fanin or size[0]
v = 1. / np.sqrt(fanin)
return torch.Tensor(size).uniform_(-v, v)
'''
def __init__(self):
super(FaninInit, self).__init__()
def _init_weight(self, _, arr):
fanin = arr.shape[0]
v = 1. / np.sqrt(fanin)
out = np.random.uniform(size=arr.shape, low=-v, high=v)
arr[:] = out
def _init_bias(self, _, arr):
arr[:] = 0
@mx.init.register
class OrthoInit(mx.init.Initializer):
def __init__(self, scale=1.0):
super(OrthoInit, self).__init__()
self.scale = scale
def _init_weight(self, _, arr):
shape = arr.shape
shape = tuple(shape)
if len(shape) == 2:
flat_shape = shape
elif len(shape) == 4:
# assumes NHWC
# flat_shape = (np.prod(shape[:-1]), shape[-1])
# assumes NCHW
flat_shape = (np.prod([shape[0], shape[2], shape[3]]), shape[1])
a = np.random.normal(0.0, 1.0, flat_shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
q = u if u.shape == flat_shape else v # pick the one with the correct shape
q = q.reshape(shape)
out = (self.scale * q[:shape[0], :shape[1]]).astype(np.float32)
arr[:] = out
def _init_bias(self, _, arr):
arr[:] = 0
def soft_update(dest, src, tau=1.0):
dest_prefix = dest.collect_params()._prefix
src_prefix = src.collect_params()._prefix
for k, v in src.collect_params().items():
dest_key = k.replace(src_prefix, dest_prefix)
data = dest.collect_params()[dest_key].data() * (1.0 - tau) + \
src.collect_params()[k].data() * tau
dest.collect_params()[dest_key].set_data(data)
class OrnsteinUhlenbeckActionNoise:
def __init__(self, action_dim, mu = 0, theta = 0.15, sigma = 0.2):
self.action_dim = action_dim
self.mu = mu
self.theta = theta
self.sigma = sigma
self.X = np.ones(self.action_dim) * self.mu
def reset(self):
self.X = np.ones(self.action_dim) * self.mu
def sample(self):
dx = self.theta * (self.mu - self.X)
dx = dx + self.sigma * np.random.randn(len(self.X))
self.X = self.X + dx
return self.X
def flatten_env_vec(arr):
'''
arr shape is [num_env, num_steps, ob_shape]
is the swap necessary??
'''
s = arr.shape
return arr.swapaxes(0, 1).reshape(s[0] * s[1], *s[2:])