-
Notifications
You must be signed in to change notification settings - Fork 2
/
base_features.py
182 lines (129 loc) · 4.97 KB
/
base_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
from __future__ import division, print_function
import os
import numpy as np
import pandas as pd
import sys
import librosa
import vamp
import utils
""" This module provides an interface to several existing audio feature time
series extractors.
Requires Librosa to be installed, and optional Vamp plug-ins.
"""
def compute_and_write(audio_dir, data_dir, features=None):
"""Compute frame-based features for all audio files in a folder.
Args:
audio_dir (str): where to find audio files
data_dir (str): where to write features
features (dict): dictionary with feature extraction functions, indexed
by feature name.
Feature extraction functions should return a time 1d-array of
frame times and a 2d-array of feature frames.
Feature name will be used as the subdirectory to
which feature CSVs are written.)
"""
if features is None:
features = {'mfcc': get_mfcc,
'hpcp': get_hpcp, 'melody': get_melody,
'beats': get_beats, 'onsets': get_onsets}
filenames = os.listdir(audio_dir)
for filename in filenames:
if filename.endswith('.wav') or filename.endswith('.mp3'):
print("Computing features for file {}...".format(filename))
x, sr = librosa.load(os.path.join(audio_dir, filename), mono=True)
for feature in features:
func = features[feature]
t, X = func(x, sr)
track_id = filename.split('.')[-2]
utils.write_feature([t, X], [data_dir, feature, track_id])
def get_mfcc(x, sr, n_mfcc=20):
"""Compute MFCC features from raw audio, using librosa.
Librosa must be installed.
Args:
x (1d-array) audio signal, mono
sr (int): sample rate
n_mfcc (int): number of coefficients to retain
Returns:
2d-array: MFCC features
"""
mfcc_all = librosa.feature.mfcc(x, sr)
n_coeff, n_frames = mfcc_all.shape
t = librosa.frames_to_time(np.arange(n_frames), sr=sr, hop_length=512)
return t, mfcc_all[:n_mfcc].T
def get_hpcp(x, sr, n_bins=12, f_min=55, f_ref=440.0, min_magn=-100):
"""Compute HPCP features from raw audio using the HPCP Vamp plugin.
Vamp, vamp python module and plug-in must be installed.
Args:
x (1d-array): audio signal, mono
sr (int): sample rate
n_bins (int): number of chroma bins
f_min (float): minimum frequency
f_ref (float): A4 tuning frequency
min_magn (float): minimum magnitude for peak detection, in dB
Returns:
1d-array: time vector
2d-array: HPCP features
"""
plugin = 'vamp-hpcp-mtg:MTG-HPCP'
params = {'LF': f_min, 'nbins': n_bins, 'reff0': f_ref,
'peakMagThreshold': min_magn}
data = vamp.collect(x, sr, plugin, parameters=params)
vamp_hop, hpcp = data['matrix']
t = float(vamp_hop) * (8 + np.arange(len(hpcp)))
return t, hpcp
def get_melody(x, sr, f_min=55, f_max=1760, min_salience=0.0, unvoiced=True):
"""Extract main melody from raw audio using the Melodia Vamp plugin.
Vamp, vamp python module and plug-in must be installed.
Args:
x (np.array): audio signal, mono
sr (int): sample rate
f_min (float): minimum frequency
f_max (float): maximum frequency
Return:
1d-array: time vector
1d-array: main melody (in cents)
"""
plugin = 'mtg-melodia:melodia'
params = {'minfqr': f_min, 'maxfqr': f_max,
'minpeaksalience': min_salience}
data = vamp.collect(x, sr, plugin, parameters=params)
vamp_hop, f0 = data['vector']
if unvoiced:
f0 = abs(f0)
f0[f0 == 0] = None
else:
f0[f0 <= 0] = None
hz2midi = lambda f: 69 + 12 * np.log2(abs(f) / 440)
melody = hz2midi(f0)
melody = melody[:, np.newaxis]
t = float(vamp_hop) * (8 + np.arange(len(melody)))
return t, melody
def get_beats(x, sr):
"""Track beats in an audio excerpt, using librosa's standard
beat tracker.
Args:
x (1d-array) audio signal, mono
sr (int): sample rate
Returns:
2d-array: beat times and beat intervals
"""
_, beat_frames = librosa.beat.beat_track(x, sr=sr)
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
t = beat_times[:-1,]
beat_intervals = np.diff(beat_times)
return t, beat_intervals
def get_onsets(x, sr):
"""Compute inter-onset intervals (IOI) from audio, using librosa.
Args:
x (1d-array) audio signal, mono
sr (int): sample rate
Returns:
2d-array: onset times and IOI
"""
onset_frames = librosa.onset.onset_detect(x, sr=sr)
onset_times = librosa.frames_to_time(onset_frames, sr=sr)
t = onset_times[:-1,]
onset_intervals = np.diff(onset_times)
return t, onset_intervals
if __name__ == '__main__':
compute_and_write(sys.argv[1], sys.argv[2])