-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit fe5f32e
Showing
16 changed files
with
1,905 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
*.pyc | ||
*.jpg | ||
*.png |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import torch | ||
import torch.nn as nn | ||
from torch.autograd import Variable | ||
import torch.nn.functional as F | ||
|
||
class MyConvLSTMCell(nn.Module): | ||
|
||
def __init__(self, input_size, hidden_size, kernel_size=3, stride=1, padding=1): | ||
super(MyConvLSTMCell, self).__init__() | ||
self.input_size = input_size | ||
self.hidden_size = hidden_size | ||
self.kernel_size = kernel_size | ||
self.stride = stride | ||
self.padding = padding | ||
self.conv_i_xx = nn.Conv2d(input_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding) | ||
self.conv_i_hh = nn.Conv2d(hidden_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding, | ||
bias=False) | ||
|
||
self.conv_f_xx = nn.Conv2d(input_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding) | ||
self.conv_f_hh = nn.Conv2d(hidden_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding, | ||
bias=False) | ||
|
||
self.conv_c_xx = nn.Conv2d(input_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding) | ||
self.conv_c_hh = nn.Conv2d(hidden_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding, | ||
bias=False) | ||
|
||
self.conv_o_xx = nn.Conv2d(input_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding) | ||
self.conv_o_hh = nn.Conv2d(hidden_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding, | ||
bias=False) | ||
|
||
torch.nn.init.xavier_normal(self.conv_i_xx.weight) | ||
torch.nn.init.constant(self.conv_i_xx.bias, 0) | ||
torch.nn.init.xavier_normal(self.conv_i_hh.weight) | ||
|
||
torch.nn.init.xavier_normal(self.conv_f_xx.weight) | ||
torch.nn.init.constant(self.conv_f_xx.bias, 0) | ||
torch.nn.init.xavier_normal(self.conv_f_hh.weight) | ||
|
||
torch.nn.init.xavier_normal(self.conv_c_xx.weight) | ||
torch.nn.init.constant(self.conv_c_xx.bias, 0) | ||
torch.nn.init.xavier_normal(self.conv_c_hh.weight) | ||
|
||
torch.nn.init.xavier_normal(self.conv_o_xx.weight) | ||
torch.nn.init.constant(self.conv_o_xx.bias, 0) | ||
torch.nn.init.xavier_normal(self.conv_o_hh.weight) | ||
|
||
def forward(self, x, state): | ||
if state is None: | ||
state = (Variable(torch.randn(x.size(0), x.size(1), x.size(2), x.size(3)).cuda()), | ||
Variable(torch.randn(x.size(0), x.size(1), x.size(2), x.size(3)).cuda())) | ||
ht_1, ct_1 = state | ||
it = F.sigmoid(self.conv_i_xx(x) + self.conv_i_hh(ht_1)) | ||
ft = F.sigmoid(self.conv_f_xx(x) + self.conv_f_hh(ht_1)) | ||
ct_tilde = F.tanh(self.conv_c_xx(x) + self.conv_c_hh(ht_1)) | ||
ct = (ct_tilde * it) + (ct_1 * ft) | ||
ot = F.sigmoid(self.conv_o_xx(x) + self.conv_o_hh(ht_1)) | ||
ht = ot * F.tanh(ct) | ||
return ht, ct |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# Object-centric Attention for Egocentric Activity Recognition | ||
|
||
The git contains the source code associated with our BMVC 2018 paper: | ||
"Attention is All We Need: Nailing Down Object-centric Attention for Egocentric Activity Recognition" | ||
The paper is available in [here](https://arxiv.org/pdf/1807.11794.pdf). | ||
|
||
#### Prerequisites | ||
|
||
* Python 3.5 | ||
* Pytorch 0.3.1 | ||
#### | ||
|
||
*Training code will be released soon!* | ||
|
||
#### **Evaluating the models** | ||
|
||
* ##### **RGB** | ||
* ``` | ||
python eval-run-rgb.py --dataset gtea61 | ||
--datasetDir ./dataset/gtea_61/split2/test | ||
--modelStateDict best_model_state_rgb.pth | ||
--seqLen 25 | ||
--memSize 512 | ||
``` | ||
* ##### **Flow** | ||
* ``` | ||
python eval-run-rgb.py --dataset gtea61 | ||
--datasetDir ./dataset/gtea_61/split2/test | ||
--modelStateDict best_model_state_flow.pth | ||
--stackSize 5 | ||
--numSegs 5 | ||
``` | ||
* ##### **Two Stream** | ||
* ``` | ||
python eval-run-twoStream-joint.py --dataset gtea61 | ||
--datasetDir ./dataset/gtea_61/split2/test | ||
--modelStateDict best_model_state_twoStream.pth | ||
--seqLen 25 | ||
--stackSize 5 | ||
--memSize 512 | ||
``` | ||
|
||
#### **Pretrained models** | ||
|
||
The models trained on the fixed split \(S2\) of GTEA 61 can be downloaded from the following links | ||
|
||
* RGB model [https://drive.google.com/open?id=1B7Xh6hQ9Py8fmL-pjmLzlCent6dnuex5](https://drive.google.com/open?id=1B7Xh6hQ9Py8fmL-pjmLzlCent6dnuex5 "RGB model") | ||
* Flow model [https://drive.google.com/open?id=1eG-ZF1IwOtYJqpIIeMASURB0uyCM\_cFd](https://drive.google.com/open?id=1eG-ZF1IwOtYJqpIIeMASURB0uyCM_cFd "Flow model") | ||
* Two stream model [https://drive.google.com/open?id=11U5xbrOr8GtEhpkxY2lpPsyFDFJ8savp](https://drive.google.com/open?id=11U5xbrOr8GtEhpkxY2lpPsyFDFJ8savp "Two stream model") | ||
|
||
The dataset can be downloaded from the following link: | ||
|
||
[http://www.cbi.gatech.edu/fpv/](http://www.cbi.gatech.edu/fpv/) | ||
|
||
Once the videos are downloaded, extract the frames and optical flow using the following implementation: | ||
|
||
[https://github.com/yjxiong/dense\_flow](https://github.com/yjxiong/dense_flow) | ||
|
||
Run 'prepareGTEA61Dataset.py' script to make the dataset. | ||
|
||
Alternatively, the frames and the corresponding warp optical flow of the GTEA 61 dataset can be downloaded from the following link | ||
|
||
* [https://drive.google.com/file/d/1\_y8Y3PnCXsngmZVMqZbg-AfJyIdOeQ2\_/view?usp=sharing](https://drive.google.com/file/d/1_y8Y3PnCXsngmZVMqZbg-AfJyIdOeQ2_/view?usp=sharing "GTEA61") | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from __future__ import print_function, division | ||
from flow_resnet import * | ||
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize) | ||
from torch.autograd import Variable | ||
from torch.utils.data.sampler import WeightedRandomSampler | ||
from makeDatasetFlow import * | ||
from sklearn.metrics import confusion_matrix | ||
import matplotlib.pyplot as plt | ||
import argparse | ||
import sys | ||
|
||
def main_run(dataset, model_state_dict, dataset_dir, stackSize, numSeg): | ||
|
||
if dataset == 'gtea61': | ||
num_classes = 61 | ||
elif dataset == 'gtea71': | ||
num_classes = 71 | ||
elif dataset == 'gtea_gaze': | ||
num_classes = 44 | ||
elif dataset == 'egtea': | ||
num_classes = 106 | ||
|
||
mean=[0.485, 0.456, 0.406] | ||
std=[0.229, 0.224, 0.225] | ||
|
||
normalize = Normalize(mean=mean, std=std) | ||
|
||
spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]) | ||
|
||
vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, sequence=True, | ||
numSeg=numSeg, stackSize=stackSize, fmt='.jpg', phase='Test') | ||
|
||
test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1, | ||
shuffle=False, num_workers=2, pin_memory=True) | ||
|
||
model = flow_resnet34(False, channels=2*stackSize, num_classes=num_classes) | ||
model.load_state_dict(torch.load(model_state_dict)) | ||
for params in model.parameters(): | ||
params.requires_grad = False | ||
|
||
model.train(False) | ||
model.cuda() | ||
test_samples = vid_seq_test.__len__() | ||
print('Number of samples = {}'.format(test_samples)) | ||
print('Evaluating...') | ||
numCorr = 0 | ||
true_labels = [] | ||
predicted_labels = [] | ||
|
||
for j, (inputs, targets) in enumerate(test_loader): | ||
inputVariable = Variable(inputs[0].cuda(), volatile=True) | ||
output_label, _ = model(inputVariable) | ||
output_label_mean = torch.mean(output_label.data, 0, True) | ||
_, predicted = torch.max(output_label_mean, 1) | ||
numCorr += (predicted == targets[0]).sum() | ||
true_labels.append(targets) | ||
predicted_labels.append(predicted) | ||
test_accuracy = (numCorr / test_samples) * 100 | ||
print('Test Accuracy = {}%'.format(test_accuracy)) | ||
|
||
cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) | ||
cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] | ||
|
||
ticks = np.linspace(0, 60, num=61) | ||
plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') | ||
plt.colorbar() | ||
plt.xticks(ticks, fontsize=6) | ||
plt.yticks(ticks, fontsize=6) | ||
plt.grid(True) | ||
plt.clim(0, 1) | ||
plt.savefig(dataset + '-flow.jpg', bbox_inches='tight') | ||
plt.show() | ||
|
||
def __main__(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--dataset', type=str, default='gtea61', help='Dataset') | ||
parser.add_argument('--datasetDir', type=str, default='./dataset/gtea_warped_flow_61/split2/test', | ||
help='Dataset directory') | ||
parser.add_argument('--modelStateDict', type=str, | ||
default='./models/gtea61/best_model_state_dict_flow_split2.pth', | ||
help='Model path') | ||
parser.add_argument('--stackSize', type=int, default=5, help='Number of optical flow images in input') | ||
parser.add_argument('--numSegs', type=int, default=5, help='Number of stacked optical flows') | ||
|
||
args = parser.parse_args() | ||
|
||
dataset = args.dataset | ||
model_state_dict = args.modelStateDict | ||
dataset_dir = args.datasetDir | ||
stackSize = args.stackSize | ||
numSegs = args.numSegs | ||
|
||
main_run(dataset, model_state_dict, dataset_dir, stackSize, numSegs) | ||
|
||
__main__() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
from __future__ import print_function, division | ||
from objectAttentionModelConvLSTM import * | ||
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize) | ||
from makeDatasetRGB import * | ||
from sklearn.metrics import confusion_matrix | ||
import matplotlib.pyplot as plt | ||
import argparse | ||
import sys | ||
|
||
def main_run(dataset, model_state_dict, dataset_dir, seqLen, memSize): | ||
|
||
if dataset == 'gtea61': | ||
num_classes = 61 | ||
elif dataset == 'gtea71': | ||
num_classes = 71 | ||
elif dataset == 'gtea_gaze': | ||
num_classes = 44 | ||
elif dataset == 'egtea': | ||
num_classes = 106 | ||
|
||
mean=[0.485, 0.456, 0.406] | ||
std=[0.229, 0.224, 0.225] | ||
|
||
normalize = Normalize(mean=mean, std=std) | ||
spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]) | ||
|
||
vid_seq_test = makeDataset(dataset_dir, | ||
spatial_transform=spatial_transform, | ||
seqLen=seqLen, fmt='.jpg') | ||
|
||
test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1, | ||
shuffle=False, num_workers=2, pin_memory=True) | ||
|
||
model = attentionModel(num_classes=num_classes, mem_size=memSize) | ||
model.load_state_dict(torch.load(model_state_dict)) | ||
|
||
for params in model.parameters(): | ||
params.requires_grad = False | ||
|
||
model.train(False) | ||
model.cuda() | ||
test_samples = vid_seq_test.__len__() | ||
print('Number of samples = {}'.format(test_samples)) | ||
print('Evaluating...') | ||
numCorr = 0 | ||
true_labels = [] | ||
predicted_labels = [] | ||
for j, (inputs, targets) in enumerate(test_loader): | ||
inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) | ||
output_label, _ = model(inputVariable) | ||
_, predicted = torch.max(output_label.data, 1) | ||
numCorr += (predicted == targets.cuda()).sum() | ||
true_labels.append(targets) | ||
predicted_labels.append(predicted) | ||
test_accuracy = (numCorr / test_samples) * 100 | ||
print('Test Accuracy = {}%'.format(test_accuracy)) | ||
|
||
cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) | ||
cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] | ||
|
||
ticks = np.linspace(0, 60, num=61) | ||
plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') | ||
plt.colorbar() | ||
plt.xticks(ticks, fontsize=6) | ||
plt.yticks(ticks, fontsize=6) | ||
plt.grid(True) | ||
plt.clim(0, 1) | ||
plt.savefig(dataset + '-rgb.jpg', bbox_inches='tight') | ||
plt.show() | ||
|
||
def __main__(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--dataset', type=str, default='gtea61', help='Dataset') | ||
parser.add_argument('--datasetDir', type=str, default='./dataset/gtea_warped_flow_61/split2/test', | ||
help='Dataset directory') | ||
parser.add_argument('--modelStateDict', type=str, default='./models/gtea61/best_model_state_dict_rgb_split2.pth', | ||
help='Model path') | ||
parser.add_argument('--seqLen', type=int, default=25, help='Length of sequence') | ||
parser.add_argument('--memSize', type=int, default=512, help='ConvLSTM hidden state size') | ||
|
||
args = parser.parse_args() | ||
|
||
dataset = args.dataset | ||
model_state_dict = args.modelStateDict | ||
dataset_dir = args.datasetDir | ||
seqLen = args.seqLen | ||
memSize = args.memSize | ||
|
||
main_run(dataset, model_state_dict, dataset_dir, seqLen, memSize) | ||
|
||
__main__() |
Oops, something went wrong.