Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
swathikirans committed Aug 1, 2018
0 parents commit fe5f32e
Show file tree
Hide file tree
Showing 16 changed files with 1,905 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.pyc
*.jpg
*.png
58 changes: 58 additions & 0 deletions MyConvLSTMCell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

class MyConvLSTMCell(nn.Module):

def __init__(self, input_size, hidden_size, kernel_size=3, stride=1, padding=1):
super(MyConvLSTMCell, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.conv_i_xx = nn.Conv2d(input_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding)
self.conv_i_hh = nn.Conv2d(hidden_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding,
bias=False)

self.conv_f_xx = nn.Conv2d(input_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding)
self.conv_f_hh = nn.Conv2d(hidden_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding,
bias=False)

self.conv_c_xx = nn.Conv2d(input_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding)
self.conv_c_hh = nn.Conv2d(hidden_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding,
bias=False)

self.conv_o_xx = nn.Conv2d(input_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding)
self.conv_o_hh = nn.Conv2d(hidden_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding,
bias=False)

torch.nn.init.xavier_normal(self.conv_i_xx.weight)
torch.nn.init.constant(self.conv_i_xx.bias, 0)
torch.nn.init.xavier_normal(self.conv_i_hh.weight)

torch.nn.init.xavier_normal(self.conv_f_xx.weight)
torch.nn.init.constant(self.conv_f_xx.bias, 0)
torch.nn.init.xavier_normal(self.conv_f_hh.weight)

torch.nn.init.xavier_normal(self.conv_c_xx.weight)
torch.nn.init.constant(self.conv_c_xx.bias, 0)
torch.nn.init.xavier_normal(self.conv_c_hh.weight)

torch.nn.init.xavier_normal(self.conv_o_xx.weight)
torch.nn.init.constant(self.conv_o_xx.bias, 0)
torch.nn.init.xavier_normal(self.conv_o_hh.weight)

def forward(self, x, state):
if state is None:
state = (Variable(torch.randn(x.size(0), x.size(1), x.size(2), x.size(3)).cuda()),
Variable(torch.randn(x.size(0), x.size(1), x.size(2), x.size(3)).cuda()))
ht_1, ct_1 = state
it = F.sigmoid(self.conv_i_xx(x) + self.conv_i_hh(ht_1))
ft = F.sigmoid(self.conv_f_xx(x) + self.conv_f_hh(ht_1))
ct_tilde = F.tanh(self.conv_c_xx(x) + self.conv_c_hh(ht_1))
ct = (ct_tilde * it) + (ct_1 * ft)
ot = F.sigmoid(self.conv_o_xx(x) + self.conv_o_hh(ht_1))
ht = ot * F.tanh(ct)
return ht, ct
66 changes: 66 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Object-centric Attention for Egocentric Activity Recognition

The git contains the source code associated with our BMVC 2018 paper:
"Attention is All We Need: Nailing Down Object-centric Attention for Egocentric Activity Recognition"
The paper is available in [here](https://arxiv.org/pdf/1807.11794.pdf).

#### Prerequisites

* Python 3.5
* Pytorch 0.3.1
####

*Training code will be released soon!*

#### **Evaluating the models**

* ##### **RGB**
* ```
python eval-run-rgb.py --dataset gtea61
--datasetDir ./dataset/gtea_61/split2/test
--modelStateDict best_model_state_rgb.pth
--seqLen 25
--memSize 512
```
* ##### **Flow**
* ```
python eval-run-rgb.py --dataset gtea61
--datasetDir ./dataset/gtea_61/split2/test
--modelStateDict best_model_state_flow.pth
--stackSize 5
--numSegs 5
```
* ##### **Two Stream**
* ```
python eval-run-twoStream-joint.py --dataset gtea61
--datasetDir ./dataset/gtea_61/split2/test
--modelStateDict best_model_state_twoStream.pth
--seqLen 25
--stackSize 5
--memSize 512
```

#### **Pretrained models**

The models trained on the fixed split \(S2\) of GTEA 61 can be downloaded from the following links

* RGB model [https://drive.google.com/open?id=1B7Xh6hQ9Py8fmL-pjmLzlCent6dnuex5](https://drive.google.com/open?id=1B7Xh6hQ9Py8fmL-pjmLzlCent6dnuex5 "RGB model")
* Flow model [https://drive.google.com/open?id=1eG-ZF1IwOtYJqpIIeMASURB0uyCM\_cFd](https://drive.google.com/open?id=1eG-ZF1IwOtYJqpIIeMASURB0uyCM_cFd "Flow model")
* Two stream model [https://drive.google.com/open?id=11U5xbrOr8GtEhpkxY2lpPsyFDFJ8savp](https://drive.google.com/open?id=11U5xbrOr8GtEhpkxY2lpPsyFDFJ8savp "Two stream model")

The dataset can be downloaded from the following link:

[http://www.cbi.gatech.edu/fpv/](http://www.cbi.gatech.edu/fpv/)

Once the videos are downloaded, extract the frames and optical flow using the following implementation:

[https://github.com/yjxiong/dense\_flow](https://github.com/yjxiong/dense_flow)

Run 'prepareGTEA61Dataset.py' script to make the dataset.

Alternatively, the frames and the corresponding warp optical flow of the GTEA 61 dataset can be downloaded from the following link

* [https://drive.google.com/file/d/1\_y8Y3PnCXsngmZVMqZbg-AfJyIdOeQ2\_/view?usp=sharing](https://drive.google.com/file/d/1_y8Y3PnCXsngmZVMqZbg-AfJyIdOeQ2_/view?usp=sharing "GTEA61")



95 changes: 95 additions & 0 deletions eval-run-flow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from __future__ import print_function, division
from flow_resnet import *
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize)
from torch.autograd import Variable
from torch.utils.data.sampler import WeightedRandomSampler
from makeDatasetFlow import *
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import argparse
import sys

def main_run(dataset, model_state_dict, dataset_dir, stackSize, numSeg):

if dataset == 'gtea61':
num_classes = 61
elif dataset == 'gtea71':
num_classes = 71
elif dataset == 'gtea_gaze':
num_classes = 44
elif dataset == 'egtea':
num_classes = 106

mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

normalize = Normalize(mean=mean, std=std)

spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])

vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, sequence=True,
numSeg=numSeg, stackSize=stackSize, fmt='.jpg', phase='Test')

test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1,
shuffle=False, num_workers=2, pin_memory=True)

model = flow_resnet34(False, channels=2*stackSize, num_classes=num_classes)
model.load_state_dict(torch.load(model_state_dict))
for params in model.parameters():
params.requires_grad = False

model.train(False)
model.cuda()
test_samples = vid_seq_test.__len__()
print('Number of samples = {}'.format(test_samples))
print('Evaluating...')
numCorr = 0
true_labels = []
predicted_labels = []

for j, (inputs, targets) in enumerate(test_loader):
inputVariable = Variable(inputs[0].cuda(), volatile=True)
output_label, _ = model(inputVariable)
output_label_mean = torch.mean(output_label.data, 0, True)
_, predicted = torch.max(output_label_mean, 1)
numCorr += (predicted == targets[0]).sum()
true_labels.append(targets)
predicted_labels.append(predicted)
test_accuracy = (numCorr / test_samples) * 100
print('Test Accuracy = {}%'.format(test_accuracy))

cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

ticks = np.linspace(0, 60, num=61)
plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
plt.colorbar()
plt.xticks(ticks, fontsize=6)
plt.yticks(ticks, fontsize=6)
plt.grid(True)
plt.clim(0, 1)
plt.savefig(dataset + '-flow.jpg', bbox_inches='tight')
plt.show()

def __main__():
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, default='gtea61', help='Dataset')
parser.add_argument('--datasetDir', type=str, default='./dataset/gtea_warped_flow_61/split2/test',
help='Dataset directory')
parser.add_argument('--modelStateDict', type=str,
default='./models/gtea61/best_model_state_dict_flow_split2.pth',
help='Model path')
parser.add_argument('--stackSize', type=int, default=5, help='Number of optical flow images in input')
parser.add_argument('--numSegs', type=int, default=5, help='Number of stacked optical flows')

args = parser.parse_args()

dataset = args.dataset
model_state_dict = args.modelStateDict
dataset_dir = args.datasetDir
stackSize = args.stackSize
numSegs = args.numSegs

main_run(dataset, model_state_dict, dataset_dir, stackSize, numSegs)

__main__()
91 changes: 91 additions & 0 deletions eval-run-rgb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from __future__ import print_function, division
from objectAttentionModelConvLSTM import *
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize)
from makeDatasetRGB import *
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import argparse
import sys

def main_run(dataset, model_state_dict, dataset_dir, seqLen, memSize):

if dataset == 'gtea61':
num_classes = 61
elif dataset == 'gtea71':
num_classes = 71
elif dataset == 'gtea_gaze':
num_classes = 44
elif dataset == 'egtea':
num_classes = 106

mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

normalize = Normalize(mean=mean, std=std)
spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])

vid_seq_test = makeDataset(dataset_dir,
spatial_transform=spatial_transform,
seqLen=seqLen, fmt='.jpg')

test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1,
shuffle=False, num_workers=2, pin_memory=True)

model = attentionModel(num_classes=num_classes, mem_size=memSize)
model.load_state_dict(torch.load(model_state_dict))

for params in model.parameters():
params.requires_grad = False

model.train(False)
model.cuda()
test_samples = vid_seq_test.__len__()
print('Number of samples = {}'.format(test_samples))
print('Evaluating...')
numCorr = 0
true_labels = []
predicted_labels = []
for j, (inputs, targets) in enumerate(test_loader):
inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True)
output_label, _ = model(inputVariable)
_, predicted = torch.max(output_label.data, 1)
numCorr += (predicted == targets.cuda()).sum()
true_labels.append(targets)
predicted_labels.append(predicted)
test_accuracy = (numCorr / test_samples) * 100
print('Test Accuracy = {}%'.format(test_accuracy))

cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

ticks = np.linspace(0, 60, num=61)
plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
plt.colorbar()
plt.xticks(ticks, fontsize=6)
plt.yticks(ticks, fontsize=6)
plt.grid(True)
plt.clim(0, 1)
plt.savefig(dataset + '-rgb.jpg', bbox_inches='tight')
plt.show()

def __main__():
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, default='gtea61', help='Dataset')
parser.add_argument('--datasetDir', type=str, default='./dataset/gtea_warped_flow_61/split2/test',
help='Dataset directory')
parser.add_argument('--modelStateDict', type=str, default='./models/gtea61/best_model_state_dict_rgb_split2.pth',
help='Model path')
parser.add_argument('--seqLen', type=int, default=25, help='Length of sequence')
parser.add_argument('--memSize', type=int, default=512, help='ConvLSTM hidden state size')

args = parser.parse_args()

dataset = args.dataset
model_state_dict = args.modelStateDict
dataset_dir = args.datasetDir
seqLen = args.seqLen
memSize = args.memSize

main_run(dataset, model_state_dict, dataset_dir, seqLen, memSize)

__main__()
Loading

0 comments on commit fe5f32e

Please sign in to comment.