Skip to content

Commit

Permalink
Corrections as requested by Anthony.
Browse files Browse the repository at this point in the history
Issue #447 I have also written an extensive class summary for OneBinTrackAlgorithm i.e. the algorithm used to find whistles.
  • Loading branch information
towsey committed May 19, 2021
1 parent 2aed2b1 commit 42552f8
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 71 deletions.
4 changes: 2 additions & 2 deletions src/AudioAnalysisTools/CommonParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ public abstract class CommonParameters : IValidatableObject

public virtual IEnumerable<ValidationResult> Validate(ValidationContext validationContext)
{
//yield return this.MinHertz.ValidateNotNull(nameof(this.MinHertz));
//yield return this.MaxHertz.ValidateNotNull(nameof(this.MaxHertz));
yield return this.MinHertz.ValidateNotNull(nameof(this.MinHertz));
yield return this.MaxHertz.ValidateNotNull(nameof(this.MaxHertz));
yield return this.ValidateLessThan(this.MinHertz, nameof(this.MinHertz), this.MaxHertz, nameof(this.MaxHertz));
yield return this.DecibelThresholds.ValidateNotNull(nameof(this.DecibelThresholds));
yield return this.DecibelThresholds.ValidateNotEmpty(nameof(this.DecibelThresholds));
Expand Down
18 changes: 0 additions & 18 deletions src/AudioAnalysisTools/Tracks/MinAndMaxBandwidthParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,6 @@ namespace AnalysisPrograms.Recognizers.Base

public class MinAndMaxBandwidthParameters : CommonParameters
{
/*
/// <summary>
/// Gets or sets the bottom bound of a search band. Units are Hertz.
/// A search band is the frequency band within which an algorithm searches for a particular track or event.
/// This is to be carefully distinguished from the top and bottom bounds of a specific event.
/// A search band consists of two parallel lines/freqeuncy bins.
/// An event is represented by a rectangle.
/// Events will/should always lie within a search band. There may be exception in edge cases, i.e. where an event sits on a search bound.
/// </summary>
public int? SearchbandMinHertz { get; set; }
/// <summary>
/// Gets or sets the the top bound of a search band. Units are Hertz.
/// A search band is the frequency band within which an algorithm searches for a particular track or event.
/// </summary>
public int? SearchbandMaxHertz { get; set; }
*/

/// <summary>
/// Gets or sets the minimum allowed bandwidth of a spectrogram track or event, units = Hertz.
/// </summary>
Expand Down
95 changes: 64 additions & 31 deletions src/AudioAnalysisTools/Tracks/OnebinTrackAlgorithm.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,26 @@ namespace AudioAnalysisTools.Tracks
using TowseyLibrary;
using TrackType = AudioAnalysisTools.Events.Tracks.TrackType;

/// <summary>
/// This class searches a spectrogram for whistles, that is, for tones or spectral peaks that persist in one frequency bin.
/// In practice, the whistles of birds and other natural sources do not occupy a single frequency bin,
/// although this statement is confounded by the choice of recording sample rate and frame size.
/// But typically, a bird whistle spreads itself across three or more frequency bins using typical values for SR etc.
/// In this class, we make an assumption about the spectral profile of a whistle and the user is expected to find the appropriate
/// sample rate, frame size and frame step such that the target whistle is detected using the profile.
/// We define a whistle profile that is 11 bins wide. The actual whistle occupies the centre three bins, ie bins -1, 0 , +1.
/// Bins -2 and +2 are ignored to allow for some flexibility in getting he right combination of sample rate, frame size and frame step.
/// To establish that the centre three bins contain a spectral peak (i.e. are part of a potential whistle),
/// we define top and bottom sidebands, each of width three bins.
/// These are used to establish a baseline intensity which must be less than that of the centre three bins.
/// The bottom sideband = bins -3, -4, -5. The top sideband = bins +3, +4, +5.
/// Defining a whistle this way introduces edge effects at the top and bottom of the spectrogram.
/// In case of the low frequency edge, in order to get as close as possible to the frequency bin zero, we do not incorporate a bottom sidebound into the calculations.
/// Also note that a typical bird whistle is not exactly a pure tone. It typically fluctuates slightly from one frequency bin to an adjacent bin and back.
/// Consequently a final step in this whistle detection algorithm is to merge adjacent whistle tracks.
/// The algorithm is not perfect but it does detect constant tone sounds. Theis algorithm is designed so as not to pick up chirps,
/// i.e. gradually rising and falling tones. However, here again the right choice of SR, frame size and frame step are important.
/// </summary>
public static class OnebinTrackAlgorithm
{
private static readonly ILog Log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);
Expand Down Expand Up @@ -54,36 +74,41 @@ public static (List<EventCommon> Events, List<Plot> DecibelPlots) GetOnebinTrack
}

/// <summary>
/// This method returns whistle (spectral peak) tracks enclosed in spectral events.
/// This method returns whistle (spectral peak) tracks enclosed as spectral events.
/// It averages dB log values incorrectly but it is faster than doing many log conversions.
/// </summary>
/// <param name="sonogram">The spectrogram to be searched.</param>
/// <param name="spectrogram">The spectrogram to be searched.</param>
/// <param name="parameters">The parameters that determine the search.</param>
/// <param name="segmentStartOffset">Enables assignment of a start time (relative to recording) to any valid event.</param>
/// <param name="decibelThreshold">The threshold for detection of a track.</param>
/// <returns>A list of acoustic events containing whistle tracks.</returns>

public static (List<EventCommon> ListOfevents, double[] CombinedIntensityArray) GetOnebinTracks(
SpectrogramStandard sonogram,
SpectrogramStandard spectrogram,
OnebinTrackParameters parameters,
TimeSpan segmentStartOffset,
double decibelThreshold)
{
var sonogramData = sonogram.Data;
int frameCount = sonogramData.GetLength(0);
int binCount = sonogramData.GetLength(1);
int nyquist = sonogram.NyquistFrequency;
var spectroData = spectrogram.Data;
int frameCount = spectroData.GetLength(0);
int binCount = spectroData.GetLength(1);
int nyquist = spectrogram.NyquistFrequency;
double binWidth = nyquist / (double)binCount;

// set lower frequency bins of the search band
int minSearchBin = (int)Math.Floor(parameters.SearchbandMinHertz.Value / binWidth);
// calculate the frequency bin for bottom of search band
// Allow for whistle sideband = one bin
int minSearchBin = (int)Math.Floor(parameters.MinHertz.Value / binWidth);
if (minSearchBin < 1)
{
minSearchBin = 1;
}

// set top search bin allowing for the top sideband.
int maxSearchBin = (int)Math.Floor(parameters.SearchbandMaxHertz.Value / binWidth) - 1;
if (maxSearchBin > binCount - 6)
// calculate the frequency bin for top of search band, allowing for the top sideband.
// see class summary above.
int topSideband = 6;
int maxSearchBin = (int)Math.Floor(parameters.MaxHertz.Value / binWidth) - 1;
if (maxSearchBin > binCount - topSideband)
{
maxSearchBin = binCount - 6;
maxSearchBin = binCount - topSideband;
}

// get max and min duration for the whistle event.
Expand All @@ -92,51 +117,57 @@ public static (List<EventCommon> ListOfevents, double[] CombinedIntensityArray)

var converter = new UnitConverters(
segmentStartOffset: segmentStartOffset.TotalSeconds,
sampleRate: sonogram.SampleRate,
frameSize: sonogram.Configuration.WindowSize,
frameOverlap: sonogram.Configuration.WindowOverlap);
sampleRate: spectrogram.SampleRate,
frameSize: spectrogram.Configuration.WindowSize,
frameOverlap: spectrogram.Configuration.WindowOverlap);

//Find all bin peaks and place in peaks matrix
var peaks = new double[frameCount, binCount];

// tf = timeframes
// tf = timeframe and bin = frequency bin.
var peaksMatrix = new double[frameCount, binCount];
for (int tf = 0; tf < frameCount; tf++)
{
for (int bin = minSearchBin; bin <= maxSearchBin; bin++)
{
if (sonogramData[tf, bin] < decibelThreshold)
//skip spectrogram cells below threshold
if (spectroData[tf, bin] < decibelThreshold)
{
continue;
}

// here we define the amplitude profile of a whistle.
// The buffer zone around centre of whistle is five bins wide. Ignore bins -2 and +2
var bandIntensity = ((sonogramData[tf, bin - 1] * 0.5) + sonogramData[tf, bin] + (sonogramData[tf, bin + 1] * 0.5)) / 2.0;
var topSidebandIntensity = (sonogramData[tf, bin + 3] + sonogramData[tf, bin + 4] + sonogramData[tf, bin + 5]) / 3.0;
// Here we define the amplitude profile of a whistle. The profile is 11 bins wide.
// The whistle occupies the centre three bins, ie bins -1, 0 , +1. Bins -2 and +2 are ignored.
// A top and bottom sidebands, each of width three bins, are used to establish a baseline intensity.
// The bottom sideband = bins -3, -4, -5. The top sideband = bins +3, +4, +5.
// For more detail see the class summary.
var bandIntensity = ((spectroData[tf, bin - 1] * 0.5) + spectroData[tf, bin] + (spectroData[tf, bin + 1] * 0.5)) / 2.0;
var topSidebandIntensity = (spectroData[tf, bin + 3] + spectroData[tf, bin + 4] + spectroData[tf, bin + 5]) / 3.0;
var netAmplitude = 0.0;
if (bin < 5)
{
// if bin < 5, i.e. too close to the bottom bin of the spectrogram, then only subtract intensity of the top sideband.
// see class summary above.
netAmplitude = bandIntensity - topSidebandIntensity;
}
else
{
var bottomSideBandIntensity = (sonogramData[tf, bin - 3] + sonogramData[tf, bin - 4] + sonogramData[tf, bin - 5]) / 3.0;
var bottomSideBandIntensity = (spectroData[tf, bin - 3] + spectroData[tf, bin - 4] + spectroData[tf, bin - 5]) / 3.0;
netAmplitude = bandIntensity - ((topSidebandIntensity + bottomSideBandIntensity) / 2.0);
}

if (netAmplitude >= decibelThreshold)
{
peaks[tf, bin] = sonogramData[tf, bin];
peaksMatrix[tf, bin] = spectroData[tf, bin];
}
}
}

var tracks = GetOnebinTracks(peaks, minDuration, maxDuration, decibelThreshold, converter);
var tracks = GetOnebinTracks(peaksMatrix, minDuration, maxDuration, decibelThreshold, converter);

// Initialise tracks as events and get the combined intensity array.
var events = new List<WhistleEvent>();
var combinedIntensityArray = new double[frameCount];
var scoreRange = new Interval<double>(0, decibelThreshold * 5);
int scalingFactor = 5; // used to make plot easier to interpret.
var scoreRange = new Interval<double>(0, decibelThreshold * scalingFactor);

foreach (var track in tracks)
{
Expand Down Expand Up @@ -167,7 +198,9 @@ public static (List<EventCommon> ListOfevents, double[] CombinedIntensityArray)
}

// This algorithm tends to produce temporally overlapped whistle events in adjacent channels.
// Combine overlapping whistle events
// This is because a typical bird whistle is not exactly horozontal.
// Combine overlapping whistle events if they are within four frequency bins of each other.
// The value 4 is somewhat arbitrary but is consistent with the whistle profile described in the class comments above.
var hertzDifference = 4 * binWidth;
var whistleEvents = WhistleEvent.CombineAdjacentWhistleEvents(events, hertzDifference);

Expand All @@ -191,8 +224,8 @@ public static List<Track> GetOnebinTracks(double[,] peaks, double minDuration, d
var tracks = new List<Track>();

// Look for possible track starts and initialise as track.
// Cannot include edge rows & columns because of edge effects.
// Each row is a time frame which is a spectrum. Each column is a frequency bin
// Cannot include the three edge columns/frequency bins because of edge effects when determining a valid peak.
for (int row = 0; row < frameCount; row++)
{
for (int col = 3; col < bandwidthBinCount - 3; col++)
Expand Down
10 changes: 0 additions & 10 deletions src/AudioAnalysisTools/Tracks/OnebinTrackParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,6 @@ namespace AnalysisPrograms.Recognizers.Base
[YamlTypeTag(typeof(OnebinTrackParameters))]
public class OnebinTrackParameters : CommonParameters
{
/// <summary>
/// Gets or sets a value indicating the minimum Hertz value of the search band.
/// </summary>
public int? SearchbandMinHertz { get; set; }

/// <summary>
/// Gets or sets a value indicating the maximum Hertz value of the search band.
/// </summary>
public int? SearchbandMaxHertz { get; set; }

/// <summary>
/// Gets or sets a value indicating whether proximal whistle tracks are to be combined.
/// Proximal means the whistle tracks are in the same frequency band
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ public void TestWhistleAlgorithm()
FrameStep = 512,
WindowFunction = WindowFunctions.HANNING,
BgNoiseThreshold = 0.0,
SearchbandMinHertz = 0, //340,
SearchbandMaxHertz = 1000, //560,
MinHertz = 0, //340,
MaxHertz = 1000, //560,
MinDuration = 4,
MaxDuration = 6,
SpeciesName = "NoName",
Expand All @@ -244,8 +244,8 @@ public void TestWhistleAlgorithm()
FrameStep = 512,
WindowFunction = WindowFunctions.HANNING,
BgNoiseThreshold = 0.0,
SearchbandMinHertz = 0,
SearchbandMaxHertz = 1000,
MinHertz = 0,
MaxHertz = 1000,
MinDuration = 4,
MaxDuration = 6,
SpeciesName = "NoName",
Expand All @@ -260,8 +260,8 @@ public void TestWhistleAlgorithm()
FrameStep = 512,
WindowFunction = WindowFunctions.HANNING,
BgNoiseThreshold = 0.0,
SearchbandMinHertz = 301,
SearchbandMaxHertz = 517,
MinHertz = 301,
MaxHertz = 517,
MinDuration = 4,
MaxDuration = 6,
SpeciesName = "NoName",
Expand All @@ -276,8 +276,8 @@ public void TestWhistleAlgorithm()
FrameStep = 512,
WindowFunction = WindowFunctions.HANNING,
BgNoiseThreshold = 0.0,
SearchbandMinHertz = 100,
SearchbandMaxHertz = 700,
MinHertz = 100,
MaxHertz = 700,
MinDuration = 4,
MaxDuration = 6,
SpeciesName = "NoName",
Expand Down Expand Up @@ -465,8 +465,8 @@ public void TestOnebinTrackAlgorithm()

var parameters = new OnebinTrackParameters()
{
SearchbandMinHertz = 500,
SearchbandMaxHertz = 6000,
MinHertz = 500,
MaxHertz = 6000,
MinDuration = 0.2,
MaxDuration = 1.1,
CombinePossibleSyllableSequence = false,
Expand Down

0 comments on commit 42552f8

Please sign in to comment.