-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlayers.py
94 lines (72 loc) · 3.57 KB
/
layers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import numpy as np
import tensorflow as tf
class LayerWithConstantParameters(tf.keras.layers.Layer):
"""Layer with constant parameters added at initialization."""
def add_parameter(self, value, name, dtype=None):
if dtype is None:
dtype = self.dtype
if isinstance(value, int) or isinstance(value, float):
value = np.array([value])
return self.add_weight(shape=value.shape,
initializer=tf.keras.initializers.Constant(value),
trainable=False,
name="param/" + name,
dtype=dtype)
class ObservationPreprocessingLayer(LayerWithConstantParameters):
"""Normalizing observations as in
https://github.com/HumanCompatibleAI/multiagent-competition/blob/72c342c4178cf189ea336a743f74e445faa6183a/gym_compete/policy.py#L77
Args:
obs_mean: mean of observation (to subtract)
obs_std: std of observation (to divide by)
clip_value: clip observations after normalization
Returns:
Callable which does the processing.
"""
def __init__(self, obs_mean, obs_std, clip_value):
super(ObservationPreprocessingLayer, self).__init__()
# print('obsmean', type(obs_mean), obs_mean.shape)
# print('tf eager', tf.executing_eagerly())
self.mean = self.add_parameter(name='mean', value=obs_mean)
self.std = self.add_parameter(name='std', value=obs_std)
self.clip = self.add_parameter(name='clip', value=clip_value)
def call(self, inputs):
# print(inputs, self.mean, self.std, self.clip)
inputs = tf.cast(inputs, tf.float32)
return tf.clip_by_value((inputs - self.mean) / self.std, -self.clip, self.clip)
class ValuePostprocessingLayer(LayerWithConstantParameters):
"""Normalizing values as in
https://github.com/HumanCompatibleAI/multiagent-competition/blob/72c342c4178cf189ea336a743f74e445faa6183a/gym_compete/policy.py#L128
Args:
value_mean: mean value to add
value_std: multiply NN output by this
Returns:
Callable which does the processing.
"""
def __init__(self, value_mean, value_std):
super(ValuePostprocessingLayer, self).__init__()
self.mean = self.add_parameter(name='mean', value=value_mean)
self.std = self.add_parameter(name='std', value=value_std)
def call(self, inputs):
return inputs * self.std + self.mean
class DiagonalNormalSamplingLayer(tf.keras.layers.Layer):
"""Sampling from a Diagonal Normal Distribution."""
def call(self, inputs):
# inputs have shape (-1, dim, 2)
assert len(inputs.shape) == 3, "Expected 3d tensor got shape %s" % inputs.shape
assert inputs.shape[2] == 2, "Expect mean/std, got shape %s" % inputs.shape
means = inputs[:, :, 0]
logstds = inputs[:, :, 1]
out = tf.random.normal(shape=tf.shape(inputs)[:2],
mean=means, stddev=tf.exp(logstds))
return out
class UnconnectedVariableLayer(tf.keras.layers.Layer):
"""Layer which outputs a trainable variable on a call."""
def __init__(self, shape, name, initializer=None):
super(UnconnectedVariableLayer, self).__init__(name=name)
if initializer is None:
initializer = tf.keras.initializers.Zeros()
self.v = self.add_weight(
shape=(1,) + shape, initializer=initializer,
trainable=True, name="var/" + name)
def call(self, inputs):
return tf.repeat(self.v, axis=0, repeats=tf.shape(inputs)[0])