Skip to content

Commit

Permalink
implment the structure
Browse files Browse the repository at this point in the history
  • Loading branch information
Harry-Yang0518 committed Apr 25, 2024
1 parent 572da37 commit 6786ea0
Show file tree
Hide file tree
Showing 14 changed files with 1,064 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
dataset/
dataset/
.output/
57 changes: 57 additions & 0 deletions Trainer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from tqdm import tqdm
import torch.optim as optim
from model import LabelSmoothCrossEntropyLoss
from dataset import get_dataset
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR


def train_and_validate(model, criterion, device, train_loader, val_loader, optimizer, epoch):
model.train()

for batch_idx, (data, target) in tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Training Epoch {epoch}"):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()

model.eval()
val_loss = 0
correct = 0
with torch.no_grad():
for data, target in tqdm(val_loader, total=len(val_loader), desc=f"Validating Epoch {epoch}"):
data, target = data.to(device), target.to(device)
output = model(data)
val_loss += criterion(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()

val_loss /= len(val_loader.dataset)
val_accuracy = 100. * correct / len(val_loader.dataset)

print(f'Validation set: Average loss: {val_loss:.4f}, Accuracy: {correct}/{len(val_loader.dataset)} ({val_accuracy:.0f}%)')
return val_loss, val_accuracy

def train_model(data_dir, model, device, lr=0.01, momentum=0.9):
#train_loader, val_loader,_ = load_data(BATCH_SIZE,)
train_loader, val_loader = get_dataset(data_dir)
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
criterion = LabelSmoothingLoss(classes=NUM_CLASSES, smoothing=0.1)
scheduler = StepLR(optimizer,step_size=100,gamma=0.25)

for epoch in range(1, EPOCHS + 1):
train_and_validate(model, criterion, device, train_loader, val_loader, optimizer, epoch)
scheduler.step()
if epoch % 10 == 0:
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': criterion
}
torch.save(checkpoint, f"result/model_checkpoint_epoch_{epoch}.pth")
Binary file added __pycache__/Trainer.cpython-310.pyc
Binary file not shown.
Binary file added __pycache__/aug_helper.cpython-310.pyc
Binary file not shown.
Binary file added __pycache__/dataset.cpython-310.pyc
Binary file not shown.
Binary file added __pycache__/model.cpython-310.pyc
Binary file not shown.
Binary file added __pycache__/utils.cpython-310.pyc
Binary file not shown.
218 changes: 218 additions & 0 deletions aug_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
import torch
import random
import torch.nn.functional as F
import numpy as np
import torch.distributed as dist
import copy
epsilon = 1e-8


class AugBasic:
def __init__(self, fs):
super().__init__()
self.fs = fs
self.fft_params = {}
if fs == 22050:
self.fft_params['win_len'] = [512, 1024, 2048]
self.fft_params['hop_len'] = [128, 256, 1024]
self.fft_params['n_fft'] = [512, 1024, 2048]
elif fs == 16000:
self.fft_params['win_len'] = [256, 512, 1024]
self.fft_params['hop_len'] = [256 // 4, 512 // 4, 1024 // 4]
self.fft_params['n_fft'] = [256, 512, 1024]
elif fs == 8000:
self.fft_params['win_len'] = [128, 256, 512]
self.fft_params['hop_len'] = [32, 64, 128]
self.fft_params['n_fft'] = [128, 256, 512]
else:
raise ValueError


def count_parameters(model):
# return sum(p.numel() for p in model.parameters() if p.requires_grad)
return sum(p.numel() for p in model.parameters() if p.requires_grad)


def make_weights_for_balanced_classes(samples, nclasses):
count = [0] * nclasses
for item in samples:
count[item[1]] += 1
weight_per_class = [0.] * nclasses
N = float(sum(count))
for i in range(nclasses):
weight_per_class[i] = N/float(count[i])
weight = [0] * len(samples)
for idx, val in enumerate(samples):
weight[idx] = weight_per_class[val[1]]
return weight


def measure_inference_time(model, input, repetitions=300, use_16b=False):
device = torch.device("cuda")
model_= copy.deepcopy(model)
model_.eval()
starter = torch.cuda.Event(enable_timing=True)
ender = torch.cuda.Event(enable_timing=True)
# repetitions = 300
timings = np.zeros((repetitions, 1))
print(input.shape)
if use_16b:
input = input.half()
model_.half()
else:
pass
input = input.to(device)
model_.to(device)
for _ in range(10):
_ = model_(input)
with torch.no_grad():
# GPU-WARM-UP
for rep in range(repetitions):
starter.record()
_ = model_(input)
ender.record()
# WAIT FOR GPU SYNC
torch.cuda.synchronize()
curr_time = starter.elapsed_time(ender)
timings[rep] = curr_time
mean_syn = np.sum(timings) / repetitions
std_syn = np.std(timings)
return mean_syn, std_syn

def collate_fn(batch):
x = [item[0] for item in batch]
y = [item[1] for item in batch]
x = torch.stack(x, dim=0).contiguous()
return (x, y)

def files_to_list(filename):
"""
Takes a text file of filenames and makes a list of filenames
"""
with open(filename, encoding="utf-8") as f:
files = f.readlines()

files = [f.rstrip() for f in files]
return files


def find_first_nnz(t, q, dim=1):
_, mask_max_indices = torch.max(t == q, dim=dim)
return mask_max_indices


def accuracy(output, target, topk=(1,)):
"""Computes the precision@k for the specified values of k"""
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
with torch.no_grad():
correct = pred.eq(target.view(1, -1).expand_as(pred))
return [correct[:k].view(-1).float().sum(0) * 100. / batch_size for k in topk]


def average_precision(output, target):
# sort examples
indices = output.argsort()[::-1]
# Computes prec@i
total_count_ = np.cumsum(np.ones((len(output), 1)))
target_ = target[indices]
ind = target_ == 1
pos_count_ = np.cumsum(ind)
total = pos_count_[-1]
pos_count_[np.logical_not(ind)] = 0
pp = pos_count_ / total_count_
precision_at_i_ = np.sum(pp)
precision_at_i = precision_at_i_/(total + epsilon)
return precision_at_i


def mAP(targs, preds):
"""Returns the model's average precision for each class
Return:
ap (FloatTensor): 1xK tensor, with avg precision for each class k
"""
if np.size(preds) == 0:
return 0
ap = np.zeros((preds.shape[1]))
# compute average precision for each class
for k in range(preds.shape[1]):
# sort scores
scores = preds[:, k]
targets = targs[:, k]
# compute average precision
ap[k] = average_precision(scores, targets)
return 100*ap.mean()

def pad_sample_seq(x, n_samples):
if x.size(-1) >= n_samples:
max_x_start = x.size(-1) - n_samples
x_start = random.randint(0, max_x_start)
x = x[x_start: x_start + n_samples]
else:
x = F.pad(
x, (0, n_samples - x.size(-1)), "constant"
).data
return x


def pad_sample_seq_batch(x, n_samples):
if x.size(0) >= n_samples:
max_x_start = x.size(0) - n_samples
x_start = random.randint(0, max_x_start)
x = x[:, x_start: x_start + n_samples]
else:
x = F.pad(
x, (0, n_samples - x.size(1)), "constant"
).data
return x


def add_weight_decay(model, weight_decay=1e-5, skip_list=()):
decay = []
no_decay = []
for name, param in model.named_parameters():
# print(name)
if not param.requires_grad:
continue
if len(param.shape) == 1 or name in skip_list:
no_decay.append(param)
else:
decay.append(param)
return [
{'params': no_decay, 'weight_decay': 0.},
{'params': decay, 'weight_decay': weight_decay}]


def _get_bn_param_ids(net):
bn_ids = []
for m in net.modules():
print(m)
if isinstance(m, torch.nn.BatchNorm1d) or isinstance(m, torch.nn.LayerNorm):
bn_ids.append(id(m.weight))
bn_ids.append(id(m.bias))
elif isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.Linear):
if m.bias is not None:
bn_ids.append(id(m.bias))
return bn_ids


def reduce_tensor(tensor, n):
rt = tensor.clone()
dist.all_reduce(rt, op=dist.ReduceOp.SUM)
rt /= n
return rt


def gather_tensor(tensor, n):
rt = tensor.clone()
tensor_list = [torch.zeros(n, device=tensor.device, dtype=torch.cuda.float()) for _ in range(n)]
dist.all_gather(tensor_list, rt)
return tensor_list


def parse_gpu_ids(gpu_ids): #list of ints
s = ''.join(str(x) + ',' for x in gpu_ids)
s = s.rstrip().rstrip(',')
return s
52 changes: 52 additions & 0 deletions dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from utils import AudioAugs
import os
import librosa
import pandas as pd
import numpy as np
import torch
import torchaudio

def load_audio_files_with_torchaudio(path, file_paths, augmentor):
features = []
for file_path in file_paths:
full_path = os.path.join(path, file_path)
waveform, sample_rate = torchaudio.load(full_path)
waveform = waveform.mean(dim=0, keepdim=True) # Ensure mono by averaging channels
augmented_waveform, _ = augmentor(waveform.squeeze(0).numpy())
augmented_waveform = torch.tensor(augmented_waveform, dtype=torch.float32).unsqueeze(0)
mfccs = torchaudio.transforms.MFCC(sample_rate=sample_rate, n_mfcc=13)(augmented_waveform)
mfccs_mean = mfccs.mean(dim=2).squeeze(0).numpy()
features.append(mfccs_mean)
return features


def get_dataset(data_dir, apply_augmentation=True):
"""
Load dataset and process it for classification task with optional augmentation.
"""
train_audio_path = os.path.join(data_dir, 'train_mp3s')
test_audio_path = os.path.join(data_dir, 'test_mp3s')
label_file = os.path.join(data_dir, 'train_label.txt')

labels = pd.read_csv(label_file, header=None, names=['file', 'label'])

train_files = os.listdir(train_audio_path)
test_files = os.listdir(test_audio_path)

# Instantiate the augmentor
augmentor = AudioAugs(k_augs=['flip', 'tshift', 'mulaw'], fs=22050) if apply_augmentation else None

# Load and process audio files
train_features = load_audio_files_with_augmentation(train_audio_path, train_files, augmentor) if apply_augmentation else load_audio_files(train_audio_path, train_files)
test_features = load_audio_files(test_audio_path, test_files) # Assume no augmentation for testing

train_df = pd.DataFrame(train_features)
train_df['label'] = labels['label'].values[:len(train_features)] # Make sure labels align correctly

test_df = pd.DataFrame(test_features)

return train_df, test_df

# # Example usage
# data_dir = '/scratch/hy2611/ML_Competition/dataset'
# train_data, test_data = get_dataset(data_dir)
55 changes: 55 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from model import SoundNetRaw
from Trainer import train_model
from dataset import get_dataset
import torch
from tqdm import tqdm
import pandas as pd


NUM_CLASSES = 4
EPOCHS = 200
BATCH_SIZE = 32
learning_rate = 0.01
momentum = 0.9
weight_decay = 0.0005
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def predict(model, device, test_loader):
model.eval()
predictions = []

with torch.no_grad():
for data in tqdm(test_loader, total=len(test_loader), desc="Predicting"):
images = data[0].to(device)
outputs = model(images)
_, predicted = torch.max(outputs, 1)
predictions.extend(predicted.cpu().numpy())

return predictions

def save_predictions_to_csv(predictions, file_name):
df = pd.DataFrame({'id': range(len(predictions)), 'category': predictions})
df.to_csv(file_name, index=False)


if __name__ == '__main__':
model = SoundNetRaw(
nf=32, # Number of filters in the initial convolution layer
clip_length=66150 // 256, # Total samples (66150 for 3s at 22050 Hz) divided by the product of the downsampling factors
embed_dim=128, # Embedding dimension
n_layers=4, # Number of layers
nhead=8, # Number of attention heads
factors=[4, 4, 4, 4], # Downsampling factors for each layer
n_classes=4, # Number of classes (adjust based on your specific task)
dim_feedforward=512 # Dimensionality of the feedforward network within the transformer layers
)
model.to(device)
data_dir = '/scratch/hy2611/ML_Competition/dataset'
train_model(data_dir, model, device)

torch.save(model, "Limbo.pth")

# test_loader = load_data(BATCH_SIZE,)[2]
_, test_loader = get_dataset(data_dir)
predictions = predict(model, device, test_loader)
save_predictions_to_csv(predictions, 'predictions.csv')
Loading

0 comments on commit 6786ea0

Please sign in to comment.