-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
44d8d8d
commit f8c58bb
Showing
231 changed files
with
17,872 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
% ACE on josmbp.local | ||
|
||
% fs = 7350 | ||
% fmin = 65.4 | ||
% binsPerSemitone = 1 | ||
% doClass = 1, constantQ = 1 | ||
|
||
% Each column below has two entries, one for 24 chords and the other for 48 chords | ||
song 0 - sqrt(mag) 1 - mag 2 - mag^2 3 - dB 4 - dB-A 5 - dB-G | ||
fps1 88.318 82.243 89.72 83.645 87.383 80.374 92.523 85.981 90.187 83.645 85.514 78.972 | ||
% Real-time ratio = 37.23 | ||
|
||
Average Accuracy for 24 chords: 88.318 89.72 87.383 92.523 90.187 85.514 | ||
Average Accuracy for 48 chords: 82.243 83.645 80.374 85.981 83.645 78.972 | ||
|
||
CSR Chord Symbol Recall: | ||
|
||
24 chords: | ||
Sqrt(Magnitude) : 88.32 | ||
Magnitude : 89.72 | ||
Magnitude^2 : 87.38 | ||
Unweighted dB : 92.52 | ||
A-weighted dB : 90.19 | ||
G-weighted dB : 85.51 | ||
|
||
48 chords: | ||
Sqrt(Magnitude) : 82.24 | ||
Magnitude : 83.64 | ||
Magnitude^2 : 80.37 | ||
Unweighted dB : 85.98 | ||
A-weighted dB : 83.64 | ||
G-weighted dB : 78.97 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
fps1 & 88.3 & 82.2 & 89.7 & 83.6 & 87.4 & 80.4 & 92.5 & 86.0 & 90.2 & 83.6 & 85.5 & 79.0 \\ | ||
|
||
% Average Accuracy for 24 chords: | ||
88.3 & 89.7 & 87.4 & \textbf{92.5} & 90.2 & 85.5\\ | ||
% Average Accuracy for 48 chords: | ||
82.2 & 83.6 & 80.4 & \textbf{86.0} & 83.6 & 79.0\\ | ||
|
||
|
||
% CSR Chord Symbol Recall | ||
|
||
% 24 chords, raw scores: | ||
88.3 & 89.7 & 87.4 & 92.5 & 90.2 & 85.5 \\ | ||
|
||
% 48 chords, raw scores: | ||
82.2 & 83.6 & 80.4 & 86.0 & 83.6 & 79.0 \\ |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
% Source (and doc): http://en.wikipedia.org/wiki/A-weighting | ||
|
||
% This A-weighting should be applied to the linear amplitude spectrum | ||
% (not power spectrum or dB). It is approximately equal to the | ||
% inverse of the 40-phon equal-loudness curve of Fletcher and Munson, | ||
% and also somewhat approximates the ISO 226:2003 update on that. | ||
% Note that 40 phons is pretty quiet. | ||
|
||
% The maximum gain is around 1.16 near 2.5 kHz, and for most | ||
% frequencies the gain is less than 1. | ||
|
||
function awt = aweighting(fc) % fc = sample frequencies in Hz | ||
|
||
N = length(fc); | ||
awt = zeros(1,N); | ||
scl = 10^(2/20); % scaling to normalize max to 1 (at 1 kHz) | ||
for n=1:N | ||
f = fc(n); | ||
awt(n) = scl * 12200^2 * f^4 / ... | ||
( (f^2+20.6^2) * sqrt((f^2+107.7^2) * (f^2+737.9^2)) * (f^2+12200^2) ); | ||
end | ||
|
||
if nargout==0 | ||
plot(fc,awt,'-*') | ||
xlabel('Frequency (Hz)'); | ||
ylabel('Amplitude (Linear)'); | ||
title('A-weighting ~ inverse of Fletcher-Munson 40-phon equal-loudness curve'); | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
% Chord Recognition Project @ CCRMA 2014 | ||
% Sub module 4: Build Template | ||
% desc: This is to build the chord template | ||
|
||
function Template = buildTemplate(fmin,bpo,numchords,doPlot,doPause) | ||
|
||
% inputs: | ||
% fmin - frequency of first bin (Hz) | ||
% bpo - number of bins per octave | ||
% doPlot - boolean, plot or not | ||
% doPause - boolean, pause or not | ||
% output: | ||
% Template - Chord Template | ||
|
||
% TO-DO (Gina) : Adding seventh chord? | ||
% Note from Kitty: if bpo=36, then we have 36 bins per octave, not 12. | ||
% So, for example | ||
% CCC C#C#C# DDD D#D#D# EEE FFF F#F#F# GGG G#G#G# AAA A#A#A# BBB | ||
% 111 0 0 0 000 0 0 0 111 000 0 0 0 111 0 0 0 000 0 0 0 000 | ||
% For 1-36bin, each 1*3 5*3 8*3 Equals 0.3 --Right | ||
% each 1*3-1 5*2-1 8*3-1 Equals 1 --Center | ||
% each 1*3-2 5*3-2 8*3-2 Equals 0.3 --Left | ||
% (Add modula: plus [1:12] , mod 36) | ||
% Then, some post Precessing with Gaussian Window for weighting e.g C-Major 0.3 1 0.3 | ||
|
||
% no_template = 24; % 12 Major + 12 Minor | ||
% no_template = 48 + 1; % 12 major, 12 minor, plus sevenths, plus 'N' and 'X' | ||
if numchords ~=24 && numchords ~= 48, error('buildTemplate.m: Expected either 24 or 48 chords'); end | ||
Template = zeros(numchords,bpo); % Create empty template | ||
|
||
% Major Chords (row 1 to 12) | ||
% Create the first row: e.g. C E G | ||
CM = zeros(1,bpo); | ||
I = 1; III = 5; V = 8; %position of I III V on keyboard | ||
bpst = bpo/12; % bins per semitone (1,3,5,...) | ||
midshift = floor(bpst/2); | ||
% was midshift = round((1+bpst)/2); % bpst should be odd | ||
CM_center = [I,III,V] * bpst - midshift; % want to offset by midshift | ||
% was CM_center = [I,III,V] * midshift; % - floor(midshift/2); %map to bpo bin case | ||
CM(CM_center) = 1; %Value at center bin | ||
if bpst>1 | ||
% Each "matched filter" will be more than a single key | ||
% CM([CM_center-1, CM_center+1]) = 0.3; % used for bpst=3 previously | ||
% Note: 0.3 ~ Gaussian at 3/4 sigma => 3/2 sigma at edge | ||
% Note also the sampled Gaussian can be installed by the construct: | ||
% | ||
% CM = CM * toeplitz([1,.3,zeros(1,bpo-3),.3]) | ||
% | ||
% and this can be generalized to | ||
% | ||
% CM = CM * toeplitz([1,righthalf,zeros(1,bpo-3),lefthalf]) | ||
% | ||
% where lefthalf = fliplr(righthalf) and righthalf = | ||
% gauss(0,1,samples_to_right_of_center_to_almost_1.5). However, for | ||
% simplicity, instead of using more Gaussian samples, we'll just use a | ||
% uniform weighting: | ||
CM = CM * toeplitz([ones(1,(bpst+1)/2),zeros(1,bpo-bpst),ones(1,(bpst-1)/2)]); | ||
end | ||
Template(1,:) = CM; | ||
for n = 2:12 | ||
prev = Template(n-1,:); | ||
Template(n,:) = circshift(prev,[0 bpst]); % Each row circ-right-shifted | ||
end | ||
|
||
% Minor Chords (row 13 to 24) | ||
% Create the first row: C bE G | ||
cM = zeros(1,bpo); | ||
I = 1; bIII = 4; V = 8; | ||
cM_center = [I,bIII,V] * bpst - midshift; % want to offset by midshift | ||
cM(cM_center) = 1; | ||
if bpst>1 | ||
cM = cM * toeplitz([ones(1,(bpst+1)/2),zeros(1,bpo-bpst),ones(1,(bpst-1)/2)]); | ||
% Previously: cM([cM_center-1, cM_center+1]) = 0.3; % Sampled Gaussian distribution | ||
end | ||
Template(13,:) = cM; | ||
for n = 14:24, | ||
prev = Template(n-1,:); | ||
Template(n,:) = circshift(prev,[0 bpst]); | ||
end; | ||
|
||
if numchords == 48, % if we use a bigger template | ||
% Major 7 chords (row 25 to 36) | ||
CM7 = zeros(1,bpo); | ||
I = 1; III = 5; V = 8; bVII = 11; % VII = 12; % since the annotations do not define whether they are maj or min 7ths | ||
CM7_center = [I,III,V,bVII] * bpst - midshift; % want to offset by midshift | ||
CM7(CM7_center) = 1 * 3/4; % JOS CHOSE 3/4 TO EQUALIZE SPECTRALLY FLAT TRIAD AND SEVENTH CHORDS | ||
% Note that this only makes sense for perceptually flattened spectra/chroma | ||
if bpst>1 | ||
CM7 = CM7 * toeplitz([ones(1,(bpst+1)/2),zeros(1,bpo-bpst),ones(1,(bpst-1)/2)]); | ||
end; | ||
Template(25,:) = CM7; | ||
for n = 26:36, | ||
prev = Template(n-1,:); | ||
Template(n,:) = circshift(prev,[0 bpo/12]); | ||
end; | ||
|
||
% Minor 7th chords (rows 37 to 48) | ||
cM7 = zeros(1,bpo); | ||
I = 1; bIII = 4; V = 8; bVII = 11; % VII = 12; % since the annotations do not define whether they are maj or min 7ths | ||
cM7_center = [I,bIII,V,bVII] * bpst - midshift; % want to offset by midshift | ||
cM7(cM7_center) = 1 * 3/4; % JOS CHOSE 3/4 TO EQUALIZE SPECTRALLY FLAT TRIAD AND SEVENTH CHORDS | ||
if bpst>1 | ||
cM7 = cM7 * toeplitz([ones(1,(bpst+1)/2),zeros(1,bpo-bpst),ones(1,(bpst-1)/2)]); | ||
end; | ||
Template(37,:) = cM7; | ||
for n = 38:48, | ||
prev = Template(n-1,:); | ||
Template(n,:) = circshift(prev,[0 bpo/12]); | ||
end; | ||
% 'N' or 'X' | ||
% Template(49,:) = zeros(1, 12); | ||
end; | ||
|
||
% ------------- END GINA'S EDITS ------------- % | ||
|
||
if doPlot | ||
screensize = get(0,'screensize'); | ||
figPos = screensize([3,4,3,4]).*[0.6 0 0.4 0.4]; % upper right corner | ||
figurepos(figPos); | ||
[pitches_M,pitches_m,pitches_M7,pitches_m7] = getPitches(fmin); | ||
|
||
% Plot the Binary Template | ||
imagesc(Template); | ||
% mesh(Template); % kind of fun | ||
title('Binary Template with Gaussian Distribution') | ||
set(gca,'XTick',1:round(bpo/12):bpo); | ||
set(gca,'XTickLabel',pitches_M); | ||
set(gca,'YTick',1:numchords); | ||
set(gca,'YDir','normal'); | ||
ylabel([num2str(numchords),' templates']); | ||
if doPause, disp('PAUSING'); pause; end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
% Chord Recognition Project @ CCRMA 2014 | ||
% Sub module 5: Chord Estimation | ||
% desc: A basic chord estimation based on the fitness matrix | ||
|
||
function [myChord Fitness_Matrix] = chordEstimate(myChroma,class,fmin,numchords,Template,doPlot,doPause) | ||
|
||
% function: chordEstimate(Template,doPlot,doPause) | ||
% input: myChroma - the chromagram | ||
% class - chromagram classification versus time: '1=chord','2=silence','3=noise' | ||
% Template - binary chord template | ||
% doPlot - boolean, plot or not | ||
% doPause - boolean, pause after each plot or not | ||
% output: myChord - my estimated chords | ||
% FitnessMatrix - The fitness matrix | ||
|
||
% Note from Kitty: I've implemented two ways, one is by using the dot product, | ||
% Another way is to use the Euclidean distance. I think they're pretty | ||
% much the same thing. But maybe we can test if we have time? | ||
|
||
%% Create Fitness Matrix | ||
% Calculating dot product of numChords Template * Chromagram | ||
% Templates: numchords * bpo | ||
% Chromagram: bpo * frames = 12 * 1889 | ||
if 0 | ||
upperChroma = ones(size(myChroma))*mean(mean(myChroma)); | ||
nKeep = 12; | ||
[nC,nF] = size(myChroma); | ||
for f=1:nF | ||
[sc,k] = sort(myChroma(:,f)); | ||
sk = k(end-nKeep+1:end); | ||
upperChroma(sk,f) = myChroma(sk,f); | ||
end | ||
else | ||
upperChroma = myChroma; | ||
end | ||
Fitness_Matrix = Template * upperChroma; | ||
|
||
% Class = | ||
% 1 for chord | ||
% 2 for silence | ||
% 3 for noise | ||
nClass = length(class); | ||
fmMin = min(min(Fitness_Matrix)); | ||
for m=1:nClass | ||
if class(m) ~= 1 | ||
Fitness_Matrix(:,m) = fmMin; % make silence/noise 0 | ||
end | ||
end | ||
|
||
if doPlot | ||
screensize = get(0,'screensize'); | ||
figPos = screensize([3,4,3,4]).*[0.6 0 0.4 0.4]; % upper right corner | ||
figurepos(figPos); | ||
% Plot the fitness matrix for interactive perusal: | ||
if size(Fitness_Matrix,2)>2 | ||
mesh(Fitness_Matrix); | ||
else | ||
plot(Fitness_Matrix); | ||
end | ||
title('Fitness Matrix = TemplateMatrix * Chromagram = Chord Scores vs Time'); | ||
xlabel('Time (frames)'); | ||
ylabel('Major-Minor Chord Index'); | ||
zlabel('Magnitude (Measure of Fit)'); | ||
if doPause, disp('PAUSING'); pause; end | ||
end | ||
|
||
% Option 1: Pick up the template that maximize dot product | ||
[~, myChord] = max(Fitness_Matrix); | ||
|
||
%% Option 2: Calculating Euclidean Distance: See below | ||
% Create Distance Matrix, find my chord sequence: mypath | ||
|
||
% Calculate Euclidean Distance | ||
% Creating a Distance Matrix of size 24 * num_of_frames | ||
% such that D(x,y) represents the Euclidean Distance | ||
% between Template(x,:) and Chroma_Matrix(:,y) | ||
% Need to flip Chroma_Matrix (from 36*frame to frame*36) | ||
% (between each frame of chroma matrix and each column in Template Matrix) | ||
% code reference: "Matlab array manipulation tips and tricks" 10.4 (Acklam) | ||
|
||
% X = Template; % Size: numchords * 36; | ||
% Y = chroma_smooth'; % Size: frame * 36; | ||
% m = 24; n = frameM; % define m,n | ||
% Distance_Matrix = sqrt(sum(abs( repmat(permute(X, [1 3 2]), [1 n 1]) ... | ||
% - repmat(permute(Y, [3 1 2]), [m 1 1]) ).^2, 3)); | ||
% | ||
|
||
% Calculate the template that has the minimum distance for each frame | ||
% [~, myChord] = min(Distance_Matrix); | ||
% size(Fitness_Matrix) | ||
|
||
%% Plot | ||
|
||
if doPlot | ||
|
||
screensize = get(0,'screensize'); | ||
figPos = screensize([3,4,3,4]).*[0.6 0 0.4 0.4]; % upper right corner | ||
figurepos(figPos); | ||
|
||
[pitches_M, pitches_m, pitches_M7, pitches_m7] = getPitches(fmin); | ||
if numchords == 24, | ||
chordn = [pitches_M;pitches_m; 'N ']; | ||
elseif numchords == 48, | ||
chordn = [pitches_M; pitches_m; pitches_M7; pitches_m7; 'N ']; | ||
end; | ||
M = size(Fitness_Matrix,2); %M: number of frames | ||
|
||
|
||
% Plot the Fitness Matrix | ||
imagesc(Fitness_Matrix); grid('on'); | ||
hold on; | ||
plot(myChord,'-*k','linewidth',3); % Overlay estimated chord | ||
title('Fitness Matrix = TemplateMatrix * Chromagram = Chord Scores vs Time'); | ||
set(gca,'YDir','normal'); | ||
set(gca,'YTick',1:numchords+1); | ||
%set(gca,'YTick',1:numchords); | ||
set(gca,'YTickLabel',chordn); | ||
% set(gca,'XTick',1:100:M-1); | ||
% secs = ((1:100:M)-1) * R/fs; | ||
% set(gca,'XTickLabel',num2str(round(secs'))); %Round to seconds | ||
% xlabel('Time (s)'); | ||
xlabel('Time (frames)'); | ||
if doPause, disp('PAUSING'); pause; end | ||
hold off | ||
end |
Oops, something went wrong.