-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathmodels.py
155 lines (114 loc) · 4.07 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import itertools
import torch
from torch.nn import Linear, LogSoftmax, Module, Parameter, Sequential, Tanh
from torch.distributions import Independent
from torch.distributions.categorical import Categorical
from torch.distributions.normal import Normal
from torch.distributions.kl import kl_divergence
from torch_utils.torch_utils import get_device
class MultinomialLayer(Module):
'''
Implements a layer that outputs a multinomial distribution
Methods
------
__call__(log_action_probs)
Takes as input log probabilities and outputs a pytorch multinomail
distribution
'''
def __init__(self):
Module.__init__(self)
def __call__(self, log_action_probs):
return Categorical(logits=log_action_probs)
class DiagGaussianLayer(Module):
'''
Implements a layer that outputs a Gaussian distribution with a diagonal
covariance matrix
Attributes
----------
log_std : torch.FloatTensor
the log square root of the diagonal elements of the covariance matrix
Methods
-------
__call__(mean)
takes as input a mean vector and outputs a Gaussian distribution with
diagonal covariance matrix defined by log_std
'''
def __init__(self, output_dim=None, log_std=None):
Module.__init__(self)
self.log_std = log_std
if log_std is None:
self.log_std = Parameter(torch.zeros(output_dim), requires_grad=True)
def __call__(self, mean):
std = torch.exp(self.log_std)
normal_dist = Independent(Normal(loc=mean, scale=std), 1)
return normal_dist
def build_layers(input_dim, hidden_dims, output_dim):
'''
Returns a list of Linear and Tanh layers with the specified layer sizes
Parameters
----------
input_dim : int
the input dimension of the first linear layer
hidden_dims : list
a list of type int specifying the sizes of the hidden layers
output_dim : int
the output dimension of the final layer in the list
Returns
-------
layers : list
a list of Linear layers, each one followed by a Tanh layer, excluding the
final layer
'''
layer_sizes = [input_dim] + hidden_dims + [output_dim]
layers = []
for i in range(len(layer_sizes) - 1):
layers.append(Linear(layer_sizes[i], layer_sizes[i + 1], bias=True))
if i != len(layer_sizes) - 2:
layers.append(Tanh())
return layers
def build_mlp(input_dim, hidden_dims, output_dim):
'''
Build a multilayer perceptron with tanh activations with the specified input,
output, and hidden layer sizes
Parameters
----------
input_dim : int
the input dimension of the first linear layer
hidden_dims : list
a list of type int specifying the sizes of the hidden layers
output_dim : int
the output dimension of the final layer in the list
Returns
-------
mlp : torch.nn.Sequential
a pytorch sequential model that defines a MLP
'''
mlp = Sequential(*build_layers(input_dim, hidden_dims, output_dim))
return mlp
def build_diag_gauss_policy(state_dim, hidden_dims, action_dim,
log_std=None):
'''
Build a multilayer perceptron with a DiagGaussianLayer at the output layer
Parameters
----------
state_dim : int
the input size of the network
hidden_dims : list
a list of type int specifying the sizes of the hidden layers
action_dim : int
the dimensionality of the Gaussian distribution to be outputted by the
policy
log_std : torch.FloatTensor
the log square root of the diagonal elements of the covariance matrix
(will be set to a vector of zeros if none is specified)
Returns
-------
policy : torch.nn.Sequential
a pytorch sequential model that outputs a Gaussian distribution
'''
layers = build_layers(state_dim, hidden_dims, action_dim)
layers[-1].weight.data *= 0.1
layers[-1].bias.data *= 0.0
layers.append(DiagGaussianLayer(action_dim, log_std))
policy = Sequential(*layers)
return policy