Skip to content

Commit

Permalink
initial upload
Browse files Browse the repository at this point in the history
  • Loading branch information
josmithiii committed Jul 22, 2014
1 parent 44d8d8d commit f8c58bb
Show file tree
Hide file tree
Showing 231 changed files with 17,872 additions and 0 deletions.
507 changes: 507 additions & 0 deletions ACE.m

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@ ace
===

Automatic Chord Estimation

Main Matlab/Octave script is ACE.m - it should run as is and print results to the terminal, and to ./RESULTS/.

See ./ShiSmithAES137.pdf for more info.

32 changes: 32 additions & 0 deletions RESULTS/ACE-eval.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
% ACE on josmbp.local

% fs = 7350
% fmin = 65.4
% binsPerSemitone = 1
% doClass = 1, constantQ = 1

% Each column below has two entries, one for 24 chords and the other for 48 chords
song 0 - sqrt(mag) 1 - mag 2 - mag^2 3 - dB 4 - dB-A 5 - dB-G
fps1 88.318 82.243 89.72 83.645 87.383 80.374 92.523 85.981 90.187 83.645 85.514 78.972
% Real-time ratio = 37.23

Average Accuracy for 24 chords: 88.318 89.72 87.383 92.523 90.187 85.514
Average Accuracy for 48 chords: 82.243 83.645 80.374 85.981 83.645 78.972

CSR Chord Symbol Recall:

24 chords:
Sqrt(Magnitude) : 88.32
Magnitude : 89.72
Magnitude^2 : 87.38
Unweighted dB : 92.52
A-weighted dB : 90.19
G-weighted dB : 85.51

48 chords:
Sqrt(Magnitude) : 82.24
Magnitude : 83.64
Magnitude^2 : 80.37
Unweighted dB : 85.98
A-weighted dB : 83.64
G-weighted dB : 78.97
15 changes: 15 additions & 0 deletions RESULTS/ACE-eval.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
fps1 & 88.3 & 82.2 & 89.7 & 83.6 & 87.4 & 80.4 & 92.5 & 86.0 & 90.2 & 83.6 & 85.5 & 79.0 \\

% Average Accuracy for 24 chords:
88.3 & 89.7 & 87.4 & \textbf{92.5} & 90.2 & 85.5\\
% Average Accuracy for 48 chords:
82.2 & 83.6 & 80.4 & \textbf{86.0} & 83.6 & 79.0\\


% CSR Chord Symbol Recall

% 24 chords, raw scores:
88.3 & 89.7 & 87.4 & 92.5 & 90.2 & 85.5 \\

% 48 chords, raw scores:
82.2 & 83.6 & 80.4 & 86.0 & 83.6 & 79.0 \\
Binary file added ShiSmithAES137.pdf
Binary file not shown.
28 changes: 28 additions & 0 deletions aweighting.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
% Source (and doc): http://en.wikipedia.org/wiki/A-weighting

% This A-weighting should be applied to the linear amplitude spectrum
% (not power spectrum or dB). It is approximately equal to the
% inverse of the 40-phon equal-loudness curve of Fletcher and Munson,
% and also somewhat approximates the ISO 226:2003 update on that.
% Note that 40 phons is pretty quiet.

% The maximum gain is around 1.16 near 2.5 kHz, and for most
% frequencies the gain is less than 1.

function awt = aweighting(fc) % fc = sample frequencies in Hz

N = length(fc);
awt = zeros(1,N);
scl = 10^(2/20); % scaling to normalize max to 1 (at 1 kHz)
for n=1:N
f = fc(n);
awt(n) = scl * 12200^2 * f^4 / ...
( (f^2+20.6^2) * sqrt((f^2+107.7^2) * (f^2+737.9^2)) * (f^2+12200^2) );
end

if nargout==0
plot(fc,awt,'-*')
xlabel('Frequency (Hz)');
ylabel('Amplitude (Linear)');
title('A-weighting ~ inverse of Fletcher-Munson 40-phon equal-loudness curve');
end
132 changes: 132 additions & 0 deletions buildTemplate.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
% Chord Recognition Project @ CCRMA 2014
% Sub module 4: Build Template
% desc: This is to build the chord template

function Template = buildTemplate(fmin,bpo,numchords,doPlot,doPause)

% inputs:
% fmin - frequency of first bin (Hz)
% bpo - number of bins per octave
% doPlot - boolean, plot or not
% doPause - boolean, pause or not
% output:
% Template - Chord Template

% TO-DO (Gina) : Adding seventh chord?
% Note from Kitty: if bpo=36, then we have 36 bins per octave, not 12.
% So, for example
% CCC C#C#C# DDD D#D#D# EEE FFF F#F#F# GGG G#G#G# AAA A#A#A# BBB
% 111 0 0 0 000 0 0 0 111 000 0 0 0 111 0 0 0 000 0 0 0 000
% For 1-36bin, each 1*3 5*3 8*3 Equals 0.3 --Right
% each 1*3-1 5*2-1 8*3-1 Equals 1 --Center
% each 1*3-2 5*3-2 8*3-2 Equals 0.3 --Left
% (Add modula: plus [1:12] , mod 36)
% Then, some post Precessing with Gaussian Window for weighting e.g C-Major 0.3 1 0.3

% no_template = 24; % 12 Major + 12 Minor
% no_template = 48 + 1; % 12 major, 12 minor, plus sevenths, plus 'N' and 'X'
if numchords ~=24 && numchords ~= 48, error('buildTemplate.m: Expected either 24 or 48 chords'); end
Template = zeros(numchords,bpo); % Create empty template

% Major Chords (row 1 to 12)
% Create the first row: e.g. C E G
CM = zeros(1,bpo);
I = 1; III = 5; V = 8; %position of I III V on keyboard
bpst = bpo/12; % bins per semitone (1,3,5,...)
midshift = floor(bpst/2);
% was midshift = round((1+bpst)/2); % bpst should be odd
CM_center = [I,III,V] * bpst - midshift; % want to offset by midshift
% was CM_center = [I,III,V] * midshift; % - floor(midshift/2); %map to bpo bin case
CM(CM_center) = 1; %Value at center bin
if bpst>1
% Each "matched filter" will be more than a single key
% CM([CM_center-1, CM_center+1]) = 0.3; % used for bpst=3 previously
% Note: 0.3 ~ Gaussian at 3/4 sigma => 3/2 sigma at edge
% Note also the sampled Gaussian can be installed by the construct:
%
% CM = CM * toeplitz([1,.3,zeros(1,bpo-3),.3])
%
% and this can be generalized to
%
% CM = CM * toeplitz([1,righthalf,zeros(1,bpo-3),lefthalf])
%
% where lefthalf = fliplr(righthalf) and righthalf =
% gauss(0,1,samples_to_right_of_center_to_almost_1.5). However, for
% simplicity, instead of using more Gaussian samples, we'll just use a
% uniform weighting:
CM = CM * toeplitz([ones(1,(bpst+1)/2),zeros(1,bpo-bpst),ones(1,(bpst-1)/2)]);
end
Template(1,:) = CM;
for n = 2:12
prev = Template(n-1,:);
Template(n,:) = circshift(prev,[0 bpst]); % Each row circ-right-shifted
end

% Minor Chords (row 13 to 24)
% Create the first row: C bE G
cM = zeros(1,bpo);
I = 1; bIII = 4; V = 8;
cM_center = [I,bIII,V] * bpst - midshift; % want to offset by midshift
cM(cM_center) = 1;
if bpst>1
cM = cM * toeplitz([ones(1,(bpst+1)/2),zeros(1,bpo-bpst),ones(1,(bpst-1)/2)]);
% Previously: cM([cM_center-1, cM_center+1]) = 0.3; % Sampled Gaussian distribution
end
Template(13,:) = cM;
for n = 14:24,
prev = Template(n-1,:);
Template(n,:) = circshift(prev,[0 bpst]);
end;

if numchords == 48, % if we use a bigger template
% Major 7 chords (row 25 to 36)
CM7 = zeros(1,bpo);
I = 1; III = 5; V = 8; bVII = 11; % VII = 12; % since the annotations do not define whether they are maj or min 7ths
CM7_center = [I,III,V,bVII] * bpst - midshift; % want to offset by midshift
CM7(CM7_center) = 1 * 3/4; % JOS CHOSE 3/4 TO EQUALIZE SPECTRALLY FLAT TRIAD AND SEVENTH CHORDS
% Note that this only makes sense for perceptually flattened spectra/chroma
if bpst>1
CM7 = CM7 * toeplitz([ones(1,(bpst+1)/2),zeros(1,bpo-bpst),ones(1,(bpst-1)/2)]);
end;
Template(25,:) = CM7;
for n = 26:36,
prev = Template(n-1,:);
Template(n,:) = circshift(prev,[0 bpo/12]);
end;

% Minor 7th chords (rows 37 to 48)
cM7 = zeros(1,bpo);
I = 1; bIII = 4; V = 8; bVII = 11; % VII = 12; % since the annotations do not define whether they are maj or min 7ths
cM7_center = [I,bIII,V,bVII] * bpst - midshift; % want to offset by midshift
cM7(cM7_center) = 1 * 3/4; % JOS CHOSE 3/4 TO EQUALIZE SPECTRALLY FLAT TRIAD AND SEVENTH CHORDS
if bpst>1
cM7 = cM7 * toeplitz([ones(1,(bpst+1)/2),zeros(1,bpo-bpst),ones(1,(bpst-1)/2)]);
end;
Template(37,:) = cM7;
for n = 38:48,
prev = Template(n-1,:);
Template(n,:) = circshift(prev,[0 bpo/12]);
end;
% 'N' or 'X'
% Template(49,:) = zeros(1, 12);
end;

% ------------- END GINA'S EDITS ------------- %

if doPlot
screensize = get(0,'screensize');
figPos = screensize([3,4,3,4]).*[0.6 0 0.4 0.4]; % upper right corner
figurepos(figPos);
[pitches_M,pitches_m,pitches_M7,pitches_m7] = getPitches(fmin);

% Plot the Binary Template
imagesc(Template);
% mesh(Template); % kind of fun
title('Binary Template with Gaussian Distribution')
set(gca,'XTick',1:round(bpo/12):bpo);
set(gca,'XTickLabel',pitches_M);
set(gca,'YTick',1:numchords);
set(gca,'YDir','normal');
ylabel([num2str(numchords),' templates']);
if doPause, disp('PAUSING'); pause; end
end
125 changes: 125 additions & 0 deletions chordEstimate.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
% Chord Recognition Project @ CCRMA 2014
% Sub module 5: Chord Estimation
% desc: A basic chord estimation based on the fitness matrix

function [myChord Fitness_Matrix] = chordEstimate(myChroma,class,fmin,numchords,Template,doPlot,doPause)

% function: chordEstimate(Template,doPlot,doPause)
% input: myChroma - the chromagram
% class - chromagram classification versus time: '1=chord','2=silence','3=noise'
% Template - binary chord template
% doPlot - boolean, plot or not
% doPause - boolean, pause after each plot or not
% output: myChord - my estimated chords
% FitnessMatrix - The fitness matrix

% Note from Kitty: I've implemented two ways, one is by using the dot product,
% Another way is to use the Euclidean distance. I think they're pretty
% much the same thing. But maybe we can test if we have time?

%% Create Fitness Matrix
% Calculating dot product of numChords Template * Chromagram
% Templates: numchords * bpo
% Chromagram: bpo * frames = 12 * 1889
if 0
upperChroma = ones(size(myChroma))*mean(mean(myChroma));
nKeep = 12;
[nC,nF] = size(myChroma);
for f=1:nF
[sc,k] = sort(myChroma(:,f));
sk = k(end-nKeep+1:end);
upperChroma(sk,f) = myChroma(sk,f);
end
else
upperChroma = myChroma;
end
Fitness_Matrix = Template * upperChroma;

% Class =
% 1 for chord
% 2 for silence
% 3 for noise
nClass = length(class);
fmMin = min(min(Fitness_Matrix));
for m=1:nClass
if class(m) ~= 1
Fitness_Matrix(:,m) = fmMin; % make silence/noise 0
end
end

if doPlot
screensize = get(0,'screensize');
figPos = screensize([3,4,3,4]).*[0.6 0 0.4 0.4]; % upper right corner
figurepos(figPos);
% Plot the fitness matrix for interactive perusal:
if size(Fitness_Matrix,2)>2
mesh(Fitness_Matrix);
else
plot(Fitness_Matrix);
end
title('Fitness Matrix = TemplateMatrix * Chromagram = Chord Scores vs Time');
xlabel('Time (frames)');
ylabel('Major-Minor Chord Index');
zlabel('Magnitude (Measure of Fit)');
if doPause, disp('PAUSING'); pause; end
end

% Option 1: Pick up the template that maximize dot product
[~, myChord] = max(Fitness_Matrix);

%% Option 2: Calculating Euclidean Distance: See below
% Create Distance Matrix, find my chord sequence: mypath

% Calculate Euclidean Distance
% Creating a Distance Matrix of size 24 * num_of_frames
% such that D(x,y) represents the Euclidean Distance
% between Template(x,:) and Chroma_Matrix(:,y)
% Need to flip Chroma_Matrix (from 36*frame to frame*36)
% (between each frame of chroma matrix and each column in Template Matrix)
% code reference: "Matlab array manipulation tips and tricks" 10.4 (Acklam)

% X = Template; % Size: numchords * 36;
% Y = chroma_smooth'; % Size: frame * 36;
% m = 24; n = frameM; % define m,n
% Distance_Matrix = sqrt(sum(abs( repmat(permute(X, [1 3 2]), [1 n 1]) ...
% - repmat(permute(Y, [3 1 2]), [m 1 1]) ).^2, 3));
%

% Calculate the template that has the minimum distance for each frame
% [~, myChord] = min(Distance_Matrix);
% size(Fitness_Matrix)

%% Plot

if doPlot

screensize = get(0,'screensize');
figPos = screensize([3,4,3,4]).*[0.6 0 0.4 0.4]; % upper right corner
figurepos(figPos);

[pitches_M, pitches_m, pitches_M7, pitches_m7] = getPitches(fmin);
if numchords == 24,
chordn = [pitches_M;pitches_m; 'N '];
elseif numchords == 48,
chordn = [pitches_M; pitches_m; pitches_M7; pitches_m7; 'N '];
end;
M = size(Fitness_Matrix,2); %M: number of frames


% Plot the Fitness Matrix
imagesc(Fitness_Matrix); grid('on');
hold on;
plot(myChord,'-*k','linewidth',3); % Overlay estimated chord
title('Fitness Matrix = TemplateMatrix * Chromagram = Chord Scores vs Time');
set(gca,'YDir','normal');
set(gca,'YTick',1:numchords+1);
%set(gca,'YTick',1:numchords);
set(gca,'YTickLabel',chordn);
% set(gca,'XTick',1:100:M-1);
% secs = ((1:100:M)-1) * R/fs;
% set(gca,'XTickLabel',num2str(round(secs'))); %Round to seconds
% xlabel('Time (s)');
xlabel('Time (frames)');
if doPause, disp('PAUSING'); pause; end
hold off
end
Loading

0 comments on commit f8c58bb

Please sign in to comment.