-
Notifications
You must be signed in to change notification settings - Fork 12
/
train_emotion.py
135 lines (123 loc) · 7.36 KB
/
train_emotion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import numpy as np, torch, torch.nn as nn, torch.optim as optim, torch.nn.functional as F
import argparse, time, pickle, os
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score, classification_report, precision_recall_fscore_support
from model import CategoricalModel, MaskedNLLLoss, BiModalAttention
from dataloader import MOSEICategorical
np.random.seed(393)
torch.cuda.device([0])
def get_train_valid_sampler(trainset, valid=0.1):
size = len(trainset)
idx = list(range(size))
split = int(valid*size)
return SubsetRandomSampler(idx[split:]), SubsetRandomSampler(idx[:split])
def get_MOSEI_loaders(path, batch_size=128, valid=0.1, num_workers=0, pin_memory=False):
trainset = MOSEICategorical(path=path)
train_sampler, valid_sampler = get_train_valid_sampler(trainset, valid)
train_loader = DataLoader(trainset, batch_size=batch_size, sampler=train_sampler, collate_fn=trainset.collate_fn, num_workers=num_workers, pin_memory=pin_memory)
valid_loader = DataLoader(trainset, batch_size=batch_size, sampler=valid_sampler, collate_fn=trainset.collate_fn, num_workers=num_workers, pin_memory=pin_memory)
testset = MOSEICategorical(path=path, train=False)
test_loader = DataLoader(testset, batch_size=batch_size, collate_fn=testset.collate_fn, num_workers=num_workers, pin_memory=pin_memory)
return train_loader, valid_loader, test_loader
def train_or_eval_model(model,loss_function, dataloader, epoch, optimizer=None, train=False):
count = 0
losses, preds, labels, masks, alphas_f, alphas_b, vids = [], [], [], [], [], [], []
assert not train or optimizer!=None
if train:
model.train()
else:
model.eval()
for data in dataloader:
count+=1
if train:
optimizer.zero_grad()
textf, visuf, acouf, qmask, umask, label = [d.cuda() for d in data[:-1]] if cuda else data[:-1]
log_prob ,alpha_f,alpha_b = model(textf, acouf, visuf, textf, qmask, umask)
lp_ = log_prob.transpose(0,1).contiguous().view(-1,log_prob.size()[2])
labels_ = label.view(-1)
loss = loss_function(lp_, labels_, umask)
pred_ = torch.argmax(lp_,1)
preds.append(pred_.data.cpu().numpy())
labels.append(labels_.data.cpu().numpy())
masks.append(umask.view(-1).cpu().numpy())
losses.append(loss.item()*masks[-1].sum())
if train:
loss.backward()
optimizer.step()
else:
alphas_f += alpha_f
alphas_b += alpha_b
vids += data[-1]
if preds!=[]:
preds = np.concatenate(preds)
labels = np.concatenate(labels)
masks = np.concatenate(masks)
else:
return float('nan'), float('nan'), [], [], [], float('nan'),[]
avg_loss = round(np.sum(losses)/np.sum(masks),4)
avg_accuracy = round(accuracy_score(labels,preds,sample_weight=masks)*100,2)
avg_fscore = round(f1_score(labels,preds,sample_weight=masks,average='weighted')*100,2)
return avg_loss, avg_accuracy, labels, preds, masks,avg_fscore, [alphas_f, alphas_b, vids]
if __name__ == '__main__':
parser = argparse.ArgumentParser("Trains a categorical model for the emotion label given by --emotion")
parser.add_argument('--no-cuda', action='store_true', default=False, help='does not use GPU')
parser.add_argument('--lr', type=float, default=0.0001, metavar='LR', help='learning rate')
parser.add_argument('--l2', type=float, default=0.00001, metavar='L2', help='L2 regularization weight')
parser.add_argument('--rec-dropout', type=float, default=0.1, metavar='rec_dropout', help='rec_dropout rate')
parser.add_argument('--dropout', type=float, default=0.5, metavar='dropout', help='dropout rate')
parser.add_argument('--batch-size', type=int, default=128, metavar='BS', help='batch size')
parser.add_argument('--epochs', type=int, default=50, metavar='E', help='number of epochs')
parser.add_argument('--class-weight', action='store_true', default=True, help='class weight')
parser.add_argument('--emotion', type=str, default='happiness', metavar='emotion', help='emotion to train')
parser.add_argument('--log_dir', type=str, default='logs/mosei_emotion', help='Directory for tensorboard logs')
args = parser.parse_args()
os.makedirs(args.log_dir, exist_ok = True)
writer = SummaryWriter(args.log_dir)
print(args)
# Run on either GPU or CPU
args.cuda = torch.cuda.is_available() and not args.no_cuda
if args.cuda:
print('Running on GPU')
else:
print('Running on CPU')
print("Tensorboard logs in " + args.log_dir)
batch_size = args.batch_size
n_classes = 2
cuda = args.cuda
n_epochs = args.epochs
emotion = str(args.emotion)
D_m_text, D_m_audio, D_m_video, D_m_context = 300, 384, 35, 300
D_g, D_p, D_e, D_h, D_a = 150, 150, 100, 100, 100
# Instantiate model
model = CategoricalModel(D_m_text,D_m_audio,D_m_video, D_m_context, D_g, D_p, D_e, D_h, n_classes=n_classes, dropout_rec=args.rec_dropout, dropout=args.dropout)
if cuda:
model.cuda()
if emotion=='anger': loss_weights = torch.FloatTensor([1/0.2128, 1/0.7872])
elif emotion=='happiness': loss_weights = torch.FloatTensor([1/0.5381, 1/0.4619])
elif emotion=='sadness': loss_weights = torch.FloatTensor([1/0.2546, 1/0.7454])
elif emotion=='disgust': loss_weights = torch.FloatTensor([1/0.1699, 1/0.8301])
elif emotion=='fear': loss_weights = torch.FloatTensor([1/0.0816, 1/0.9184])
elif emotion=='surprise': loss_weights = torch.FloatTensor([1/0.0986, 1/0.9014])
if args.class_weight:
loss_function = MaskedNLLLoss(loss_weights.cuda() if cuda else loss_weights)
else:
loss_function = MaskedNLLLoss()
# Get optimizers and relevanat dataloaders
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2)
train_loader, valid_loader, test_loader = get_MOSEI_loaders('./data/' + str(emotion) + '.pkl', valid=0.0, batch_size=batch_size, num_workers=0)
best_loss, best_label, best_pred, best_mask = None, None, None, None
# Training loop
for e in tqdm(range(n_epochs), desc = 'MOSEI Emotion (' + str(emotion) + ')'):
train_loss, train_acc, _,_,_,train_fscore,_ = train_or_eval_model(model, loss_function, train_loader, e, optimizer, True)
test_loss, test_acc, test_label, test_pred, test_mask, test_fscore, attentions = train_or_eval_model(model,loss_function, test_loader, e)
writer.add_scalar("Train Loss - MOSEI Emotion (' + str(emotion) + ')", train_loss, e)
writer.add_scalar("Test Loss - MOSEI Emotion (' + str(emotion) + ')", test_loss, e)
if best_loss == None or best_loss > test_loss:
best_loss, best_label, best_pred, best_mask, best_attn = test_loss, test_label, test_pred, test_mask, attentions
print('Test performance..')
print('Loss {} accuracy {}'.format(best_loss, round(accuracy_score(best_label,best_pred,sample_weight=best_mask)*100,2)))
print(classification_report(best_label,best_pred,sample_weight=best_mask,digits=4))
print(confusion_matrix(best_label,best_pred,sample_weight=best_mask))