-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
572da37
commit 6786ea0
Showing
14 changed files
with
1,064 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
dataset/ | ||
dataset/ | ||
.output/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from tqdm import tqdm | ||
import torch.optim as optim | ||
from model import LabelSmoothCrossEntropyLoss | ||
from dataset import get_dataset | ||
import torch | ||
import torch.nn as nn | ||
import torch.optim as optim | ||
import torch.nn.functional as F | ||
from torch.optim.lr_scheduler import StepLR | ||
|
||
|
||
def train_and_validate(model, criterion, device, train_loader, val_loader, optimizer, epoch): | ||
model.train() | ||
|
||
for batch_idx, (data, target) in tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Training Epoch {epoch}"): | ||
data, target = data.to(device), target.to(device) | ||
optimizer.zero_grad() | ||
output = model(data) | ||
loss = criterion(output, target) | ||
loss.backward() | ||
optimizer.step() | ||
|
||
model.eval() | ||
val_loss = 0 | ||
correct = 0 | ||
with torch.no_grad(): | ||
for data, target in tqdm(val_loader, total=len(val_loader), desc=f"Validating Epoch {epoch}"): | ||
data, target = data.to(device), target.to(device) | ||
output = model(data) | ||
val_loss += criterion(output, target).item() | ||
pred = output.argmax(dim=1, keepdim=True) | ||
correct += pred.eq(target.view_as(pred)).sum().item() | ||
|
||
val_loss /= len(val_loader.dataset) | ||
val_accuracy = 100. * correct / len(val_loader.dataset) | ||
|
||
print(f'Validation set: Average loss: {val_loss:.4f}, Accuracy: {correct}/{len(val_loader.dataset)} ({val_accuracy:.0f}%)') | ||
return val_loss, val_accuracy | ||
|
||
def train_model(data_dir, model, device, lr=0.01, momentum=0.9): | ||
#train_loader, val_loader,_ = load_data(BATCH_SIZE,) | ||
train_loader, val_loader = get_dataset(data_dir) | ||
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay) | ||
criterion = LabelSmoothingLoss(classes=NUM_CLASSES, smoothing=0.1) | ||
scheduler = StepLR(optimizer,step_size=100,gamma=0.25) | ||
|
||
for epoch in range(1, EPOCHS + 1): | ||
train_and_validate(model, criterion, device, train_loader, val_loader, optimizer, epoch) | ||
scheduler.step() | ||
if epoch % 10 == 0: | ||
checkpoint = { | ||
'epoch': epoch, | ||
'model_state_dict': model.state_dict(), | ||
'optimizer_state_dict': optimizer.state_dict(), | ||
'loss': criterion | ||
} | ||
torch.save(checkpoint, f"result/model_checkpoint_epoch_{epoch}.pth") |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
import torch | ||
import random | ||
import torch.nn.functional as F | ||
import numpy as np | ||
import torch.distributed as dist | ||
import copy | ||
epsilon = 1e-8 | ||
|
||
|
||
class AugBasic: | ||
def __init__(self, fs): | ||
super().__init__() | ||
self.fs = fs | ||
self.fft_params = {} | ||
if fs == 22050: | ||
self.fft_params['win_len'] = [512, 1024, 2048] | ||
self.fft_params['hop_len'] = [128, 256, 1024] | ||
self.fft_params['n_fft'] = [512, 1024, 2048] | ||
elif fs == 16000: | ||
self.fft_params['win_len'] = [256, 512, 1024] | ||
self.fft_params['hop_len'] = [256 // 4, 512 // 4, 1024 // 4] | ||
self.fft_params['n_fft'] = [256, 512, 1024] | ||
elif fs == 8000: | ||
self.fft_params['win_len'] = [128, 256, 512] | ||
self.fft_params['hop_len'] = [32, 64, 128] | ||
self.fft_params['n_fft'] = [128, 256, 512] | ||
else: | ||
raise ValueError | ||
|
||
|
||
def count_parameters(model): | ||
# return sum(p.numel() for p in model.parameters() if p.requires_grad) | ||
return sum(p.numel() for p in model.parameters() if p.requires_grad) | ||
|
||
|
||
def make_weights_for_balanced_classes(samples, nclasses): | ||
count = [0] * nclasses | ||
for item in samples: | ||
count[item[1]] += 1 | ||
weight_per_class = [0.] * nclasses | ||
N = float(sum(count)) | ||
for i in range(nclasses): | ||
weight_per_class[i] = N/float(count[i]) | ||
weight = [0] * len(samples) | ||
for idx, val in enumerate(samples): | ||
weight[idx] = weight_per_class[val[1]] | ||
return weight | ||
|
||
|
||
def measure_inference_time(model, input, repetitions=300, use_16b=False): | ||
device = torch.device("cuda") | ||
model_= copy.deepcopy(model) | ||
model_.eval() | ||
starter = torch.cuda.Event(enable_timing=True) | ||
ender = torch.cuda.Event(enable_timing=True) | ||
# repetitions = 300 | ||
timings = np.zeros((repetitions, 1)) | ||
print(input.shape) | ||
if use_16b: | ||
input = input.half() | ||
model_.half() | ||
else: | ||
pass | ||
input = input.to(device) | ||
model_.to(device) | ||
for _ in range(10): | ||
_ = model_(input) | ||
with torch.no_grad(): | ||
# GPU-WARM-UP | ||
for rep in range(repetitions): | ||
starter.record() | ||
_ = model_(input) | ||
ender.record() | ||
# WAIT FOR GPU SYNC | ||
torch.cuda.synchronize() | ||
curr_time = starter.elapsed_time(ender) | ||
timings[rep] = curr_time | ||
mean_syn = np.sum(timings) / repetitions | ||
std_syn = np.std(timings) | ||
return mean_syn, std_syn | ||
|
||
def collate_fn(batch): | ||
x = [item[0] for item in batch] | ||
y = [item[1] for item in batch] | ||
x = torch.stack(x, dim=0).contiguous() | ||
return (x, y) | ||
|
||
def files_to_list(filename): | ||
""" | ||
Takes a text file of filenames and makes a list of filenames | ||
""" | ||
with open(filename, encoding="utf-8") as f: | ||
files = f.readlines() | ||
|
||
files = [f.rstrip() for f in files] | ||
return files | ||
|
||
|
||
def find_first_nnz(t, q, dim=1): | ||
_, mask_max_indices = torch.max(t == q, dim=dim) | ||
return mask_max_indices | ||
|
||
|
||
def accuracy(output, target, topk=(1,)): | ||
"""Computes the precision@k for the specified values of k""" | ||
maxk = max(topk) | ||
batch_size = target.size(0) | ||
_, pred = output.topk(maxk, 1, True, True) | ||
pred = pred.t() | ||
with torch.no_grad(): | ||
correct = pred.eq(target.view(1, -1).expand_as(pred)) | ||
return [correct[:k].view(-1).float().sum(0) * 100. / batch_size for k in topk] | ||
|
||
|
||
def average_precision(output, target): | ||
# sort examples | ||
indices = output.argsort()[::-1] | ||
# Computes prec@i | ||
total_count_ = np.cumsum(np.ones((len(output), 1))) | ||
target_ = target[indices] | ||
ind = target_ == 1 | ||
pos_count_ = np.cumsum(ind) | ||
total = pos_count_[-1] | ||
pos_count_[np.logical_not(ind)] = 0 | ||
pp = pos_count_ / total_count_ | ||
precision_at_i_ = np.sum(pp) | ||
precision_at_i = precision_at_i_/(total + epsilon) | ||
return precision_at_i | ||
|
||
|
||
def mAP(targs, preds): | ||
"""Returns the model's average precision for each class | ||
Return: | ||
ap (FloatTensor): 1xK tensor, with avg precision for each class k | ||
""" | ||
if np.size(preds) == 0: | ||
return 0 | ||
ap = np.zeros((preds.shape[1])) | ||
# compute average precision for each class | ||
for k in range(preds.shape[1]): | ||
# sort scores | ||
scores = preds[:, k] | ||
targets = targs[:, k] | ||
# compute average precision | ||
ap[k] = average_precision(scores, targets) | ||
return 100*ap.mean() | ||
|
||
def pad_sample_seq(x, n_samples): | ||
if x.size(-1) >= n_samples: | ||
max_x_start = x.size(-1) - n_samples | ||
x_start = random.randint(0, max_x_start) | ||
x = x[x_start: x_start + n_samples] | ||
else: | ||
x = F.pad( | ||
x, (0, n_samples - x.size(-1)), "constant" | ||
).data | ||
return x | ||
|
||
|
||
def pad_sample_seq_batch(x, n_samples): | ||
if x.size(0) >= n_samples: | ||
max_x_start = x.size(0) - n_samples | ||
x_start = random.randint(0, max_x_start) | ||
x = x[:, x_start: x_start + n_samples] | ||
else: | ||
x = F.pad( | ||
x, (0, n_samples - x.size(1)), "constant" | ||
).data | ||
return x | ||
|
||
|
||
def add_weight_decay(model, weight_decay=1e-5, skip_list=()): | ||
decay = [] | ||
no_decay = [] | ||
for name, param in model.named_parameters(): | ||
# print(name) | ||
if not param.requires_grad: | ||
continue | ||
if len(param.shape) == 1 or name in skip_list: | ||
no_decay.append(param) | ||
else: | ||
decay.append(param) | ||
return [ | ||
{'params': no_decay, 'weight_decay': 0.}, | ||
{'params': decay, 'weight_decay': weight_decay}] | ||
|
||
|
||
def _get_bn_param_ids(net): | ||
bn_ids = [] | ||
for m in net.modules(): | ||
print(m) | ||
if isinstance(m, torch.nn.BatchNorm1d) or isinstance(m, torch.nn.LayerNorm): | ||
bn_ids.append(id(m.weight)) | ||
bn_ids.append(id(m.bias)) | ||
elif isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.Linear): | ||
if m.bias is not None: | ||
bn_ids.append(id(m.bias)) | ||
return bn_ids | ||
|
||
|
||
def reduce_tensor(tensor, n): | ||
rt = tensor.clone() | ||
dist.all_reduce(rt, op=dist.ReduceOp.SUM) | ||
rt /= n | ||
return rt | ||
|
||
|
||
def gather_tensor(tensor, n): | ||
rt = tensor.clone() | ||
tensor_list = [torch.zeros(n, device=tensor.device, dtype=torch.cuda.float()) for _ in range(n)] | ||
dist.all_gather(tensor_list, rt) | ||
return tensor_list | ||
|
||
|
||
def parse_gpu_ids(gpu_ids): #list of ints | ||
s = ''.join(str(x) + ',' for x in gpu_ids) | ||
s = s.rstrip().rstrip(',') | ||
return s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from utils import AudioAugs | ||
import os | ||
import librosa | ||
import pandas as pd | ||
import numpy as np | ||
import torch | ||
import torchaudio | ||
|
||
def load_audio_files_with_torchaudio(path, file_paths, augmentor): | ||
features = [] | ||
for file_path in file_paths: | ||
full_path = os.path.join(path, file_path) | ||
waveform, sample_rate = torchaudio.load(full_path) | ||
waveform = waveform.mean(dim=0, keepdim=True) # Ensure mono by averaging channels | ||
augmented_waveform, _ = augmentor(waveform.squeeze(0).numpy()) | ||
augmented_waveform = torch.tensor(augmented_waveform, dtype=torch.float32).unsqueeze(0) | ||
mfccs = torchaudio.transforms.MFCC(sample_rate=sample_rate, n_mfcc=13)(augmented_waveform) | ||
mfccs_mean = mfccs.mean(dim=2).squeeze(0).numpy() | ||
features.append(mfccs_mean) | ||
return features | ||
|
||
|
||
def get_dataset(data_dir, apply_augmentation=True): | ||
""" | ||
Load dataset and process it for classification task with optional augmentation. | ||
""" | ||
train_audio_path = os.path.join(data_dir, 'train_mp3s') | ||
test_audio_path = os.path.join(data_dir, 'test_mp3s') | ||
label_file = os.path.join(data_dir, 'train_label.txt') | ||
|
||
labels = pd.read_csv(label_file, header=None, names=['file', 'label']) | ||
|
||
train_files = os.listdir(train_audio_path) | ||
test_files = os.listdir(test_audio_path) | ||
|
||
# Instantiate the augmentor | ||
augmentor = AudioAugs(k_augs=['flip', 'tshift', 'mulaw'], fs=22050) if apply_augmentation else None | ||
|
||
# Load and process audio files | ||
train_features = load_audio_files_with_augmentation(train_audio_path, train_files, augmentor) if apply_augmentation else load_audio_files(train_audio_path, train_files) | ||
test_features = load_audio_files(test_audio_path, test_files) # Assume no augmentation for testing | ||
|
||
train_df = pd.DataFrame(train_features) | ||
train_df['label'] = labels['label'].values[:len(train_features)] # Make sure labels align correctly | ||
|
||
test_df = pd.DataFrame(test_features) | ||
|
||
return train_df, test_df | ||
|
||
# # Example usage | ||
# data_dir = '/scratch/hy2611/ML_Competition/dataset' | ||
# train_data, test_data = get_dataset(data_dir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
from model import SoundNetRaw | ||
from Trainer import train_model | ||
from dataset import get_dataset | ||
import torch | ||
from tqdm import tqdm | ||
import pandas as pd | ||
|
||
|
||
NUM_CLASSES = 4 | ||
EPOCHS = 200 | ||
BATCH_SIZE = 32 | ||
learning_rate = 0.01 | ||
momentum = 0.9 | ||
weight_decay = 0.0005 | ||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | ||
|
||
def predict(model, device, test_loader): | ||
model.eval() | ||
predictions = [] | ||
|
||
with torch.no_grad(): | ||
for data in tqdm(test_loader, total=len(test_loader), desc="Predicting"): | ||
images = data[0].to(device) | ||
outputs = model(images) | ||
_, predicted = torch.max(outputs, 1) | ||
predictions.extend(predicted.cpu().numpy()) | ||
|
||
return predictions | ||
|
||
def save_predictions_to_csv(predictions, file_name): | ||
df = pd.DataFrame({'id': range(len(predictions)), 'category': predictions}) | ||
df.to_csv(file_name, index=False) | ||
|
||
|
||
if __name__ == '__main__': | ||
model = SoundNetRaw( | ||
nf=32, # Number of filters in the initial convolution layer | ||
clip_length=66150 // 256, # Total samples (66150 for 3s at 22050 Hz) divided by the product of the downsampling factors | ||
embed_dim=128, # Embedding dimension | ||
n_layers=4, # Number of layers | ||
nhead=8, # Number of attention heads | ||
factors=[4, 4, 4, 4], # Downsampling factors for each layer | ||
n_classes=4, # Number of classes (adjust based on your specific task) | ||
dim_feedforward=512 # Dimensionality of the feedforward network within the transformer layers | ||
) | ||
model.to(device) | ||
data_dir = '/scratch/hy2611/ML_Competition/dataset' | ||
train_model(data_dir, model, device) | ||
|
||
torch.save(model, "Limbo.pth") | ||
|
||
# test_loader = load_data(BATCH_SIZE,)[2] | ||
_, test_loader = get_dataset(data_dir) | ||
predictions = predict(model, device, test_loader) | ||
save_predictions_to_csv(predictions, 'predictions.csv') |
Oops, something went wrong.