-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmodels.py
280 lines (240 loc) · 10.5 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
import numpy as np
import tensorflow as tf
import keras.backend as K
from keras.models import Input, Sequential
from keras.losses import categorical_crossentropy
from utils import categorical_cross_entropy as CCE
from keras_self_attention import SeqSelfAttention as Attention
from keras.layers import (
LSTM,
Dense,
Conv1D,
Lambda,
Dropout,
MaxPool1D,
GlobalMaxPool1D,
Embedding,
Bidirectional,
TimeDistributed
)
one_hot = lambda y, len_tag: K.one_hot(K.cast(y, tf.int32), len_tag)
class AMT4NERaNEN(object):
'''
Attention Based Multi-Task Learning for NER and NEN
'''
def __init__(self, len_char, len_charcab, len_sent, len_vocab, len_emd, len_ner_tag, len_nen_tag,
params):
# Word Embedding Size
self.len_word_emd = len_emd
# Char Embedding Size
self.len_char_emd = 30
# Maximum Char Length
self.len_char = len_char
# Maximum Sentence Length
self.len_sent = len_sent
# Vocabulary Size
self.len_vocab = len_vocab
# Char Vocabulary Size
self.len_charcab = len_charcab
# NER Tag Size
self.len_ner_tag = len_ner_tag
# NEN Tag Size
self.len_nen_tag = len_nen_tag
# Task Type
self.task = params.task
# Hyper-parameters
self.lr = params.lr
self.beta1 = params.beta1
self.beta2 = params.beta2
self.rnn_units = params.rnn_units
self.encoder_type = params.encoder_type
# Pretrained Word Embeddings config
self.len_word_emd = params.embeddings_size
self.embeddings = params.embeddings
if self.encoder_type=="lstm":
self.embeddings = np.vstack((np.array([0.]*self.len_word_emd),
self.embeddings))
def __init_layers(self, keep_prob=0.):
# Embedding Layer
self.char_emd = Embedding(input_dim=self.len_charcab,
output_dim=self.len_char_emd,
mask_zero=True,
trainable=True)
self.word_emd = Embedding(input_dim=self.len_vocab,
output_dim=self.len_word_emd,
weights=[self.embeddings],
mask_zero=True if self.encoder_type=="lstm" else False,
trainable=False)
# LSTM Encoder Layer
self._lstm_encoder = Sequential([
Bidirectional(LSTM(units=self.rnn_units,
dropout=keep_prob,
return_sequences=True))
])
# CNN Layer
self.conv1 = Conv1D(self.rnn_units, 3, border_mode='same')
self.conv2 = Conv1D(self.rnn_units, 3, border_mode='same')
# NER Decoder Layer
self._ner_decoder = Sequential([
Bidirectional(LSTM(units=self.rnn_units,
dropout=keep_prob,
return_sequences=True)),
TimeDistributed(Dense(self.len_ner_tag, activation=K.softmax))
])
# NEN Decoder Layer
self._nen_decoder = Sequential([
Bidirectional(LSTM(units=self.rnn_units,
dropout=keep_prob,
return_sequences=True)),
TimeDistributed(Dense(self.len_nen_tag, activation=K.softmax))
])
# Task Discriminator Layer
self.discrim_task = Sequential([
Bidirectional(LSTM(units=self.rnn_units,
dropout=keep_prob,
return_sequences=True)),
TimeDistributed(Dense(2, activation=K.softmax))
])
# Diversity Discriminator Layer
self.discrim_diversity = Sequential([
Bidirectional(LSTM(units=self.rnn_units,
dropout=keep_prob,
return_sequences=True)),
TimeDistributed(Dense(2, activation=K.softmax))
])
def ner_decoder(self, x):
x = Attention(attention_activation='tanh')(x)
return x, self._ner_decoder(x)
def nen_decoder(self, x):
x = Attention(attention_activation='tanh')(x)
return x, self._nen_decoder(x)
def _cnn_encoder(self, inputs):
'''
1D Convolution Neural Networks
'''
conv1_output = self.conv1(inputs)
conv2_output = self.conv2(conv1_output)
outputs = K.concatenate([inputs, conv2_output], axis=-1)
return outputs
def _char_encoder(self, inputs, keep_prob=0.):
'''
Char Encoder Layer
'''
char_encoder = Sequential([
Bidirectional(LSTM(units=self.rnn_units,
dropout=keep_prob,
return_sequences=False))
], name='char_encoder')
outputs = char_encoder(inputs)
return outputs
@property
def encoder(self):
return getattr(self, f"_{self.encoder_type}_encoder")
def __collect_vars(self):
name_scopes = ['embedding', 'encoder', 'decoder', 'discrim_task', 'discrim_diversity']
t_vars = tf.trainable_variables()
cache = {}
for name in name_scopes:
tmp = [var for var in t_vars if name in var.name]
cache[name] = tmp
return cache
def __call__(self):
# Dropout rate
keep_prob = 0.
# init all layers parameters
self.__init_layers(keep_prob)
# char sequence placeholder
c = Input(shape=(self.len_sent, self.len_char,))
# char sequence of unlabel sentence
u_c = Input(shape=(self.len_sent, self.len_char,))
# sentence sequence placeholder
x = Input(shape=(self.len_sent,))
# sentence sequence of unlable sentence
u_x = Input(shape=(self.len_sent,))
# NER Task Tags
ner_y = Input(shape=(self.len_sent,))
ner_oh_y = one_hot(ner_y, self.len_ner_tag)
# NEN Task Tags
nen_y = Input(shape=(self.len_sent,))
nen_oh_y = one_hot(nen_y, self.len_nen_tag)
with tf.name_scope('embedding'):
# handle labeled data
word_emd = self.word_emd(x)
char_emd = K.reshape(self.char_emd(c), [-1, self.len_char, self.len_char_emd])
# handle unlabel data
u_word_emd = self.word_emd(u_x)
u_char_emd = K.reshape(self.char_emd(u_c), [-1, self.len_char, self.len_char_emd])
with tf.name_scope('encoder'):
# handle labeled data
char_x = K.reshape(self._char_encoder(char_emd), [-1, self.len_sent, self.rnn_units*2])
word_char_x = K.concatenate([word_emd, char_x], axis=-1)
en_word_char_x = self.encoder(word_char_x)
# handle unlabeled data
u_char_x = K.reshape(self._char_encoder(u_char_emd), [-1, self.len_sent, self.rnn_units*2])
u_word_char_x = K.concatenate([u_word_emd, u_char_x], axis=-1)
u_en_word_char_x = self.encoder(u_word_char_x)
with tf.name_scope('decoder'):
ner_att, ner_prob = self.ner_decoder(en_word_char_x)
nen_att, nen_prob = self.nen_decoder(en_word_char_x)
with tf.name_scope('discrim_task'):
dis_ner = self.discrim_task(ner_att)
dis_nen = self.discrim_task(nen_att)
with tf.name_scope('discrim_diversity'):
dis_label = self.discrim_diversity(en_word_char_x)
dis_unlabel = self.discrim_diversity(u_en_word_char_x)
# Diversity Discriminator Loss
discrim_decoder_loss = CCE(dis_label, tf.ones_like(dis_label)[:,:,0])+\
CCE(dis_unlabel, tf.zeros_like(dis_unlabel)[:,:,0])
discrim_encoder_loss = CCE(dis_label, tf.ones_like(dis_label)[:,:,0])+\
CCE(dis_unlabel, tf.ones_like(dis_unlabel)[:,:,0])
# Task Discriminator Loss
discrim_private_loss = CCE(dis_ner, tf.ones_like(dis_ner)[:,:,0])+\
CCE(dis_nen, tf.zeros_like(dis_nen)[:,:,0])
discrim_share_loss = - discrim_private_loss
# NER Task Loss
ner_loss = tf.reduce_mean(categorical_crossentropy(ner_oh_y, ner_prob))
# NEN Task Loss
nen_loss = tf.reduce_mean(categorical_crossentropy(nen_oh_y, nen_prob))
# Trainable Parameters
t_vars = self.__collect_vars()
task_loss = {
'all': nen_loss + ner_loss,
'nen': nen_loss,
'ner': ner_loss
}
assert self.task in task_loss.keys(), "task type is not in this model!"
# Diversity Discriminator Solver
discrim_decoder_solver = tf.compat.v1.train.AdamOptimizer(1e-4)\
.minimize(discrim_decoder_loss, var_list=t_vars['discrim_diversity'])
discrim_encoder_solver = tf.compat.v1.train.AdamOptimizer(1e-4)\
.minimize(discrim_encoder_loss, var_list=t_vars['encoder'])
# Task Solver
loss = task_loss[self.task]
solver = tf.compat.v1.train.AdamOptimizer(self.lr, beta1=self.beta1, beta2=self.beta2)\
.minimize(loss)
# Task Discriminator Solver
discrim_private_solver = tf.compat.v1.train.AdamOptimizer(1e-4, beta1=0.5, beta2=0.5)\
.minimize(discrim_private_loss, var_list=t_vars['decoder']+t_vars['discrim_task'])
discrim_share_solver = tf.compat.v1.train.AdamOptimizer(1e-4, beta1=0.5, beta2=0.5)\
.minimize(discrim_share_loss, var_list=t_vars['encoder'])
return {
'c': c,
'x': x,
'u_c': u_c,
'u_x': u_x,
'ner_y': ner_y,
'nen_y': nen_y,
'ner_prob': ner_prob,
'nen_prob': nen_prob,
'loss': loss,
'solver': solver,
'discrim_private_loss': discrim_private_loss,
'discrim_private_solver': discrim_private_solver,
'discrim_share_loss': discrim_share_loss,
'discrim_share_solver': discrim_share_solver,
'dis_label': dis_label,
'discrim_encoder_loss': discrim_encoder_loss,
'discrim_encoder_solver': discrim_encoder_solver,
'discrim_decoder_loss': discrim_decoder_loss,
'discrim_decoder_solver': discrim_decoder_solver
}