diff --git a/src/AudioAnalysisTools/CommonParameters.cs b/src/AudioAnalysisTools/CommonParameters.cs index da58093fc..801d3c5e6 100644 --- a/src/AudioAnalysisTools/CommonParameters.cs +++ b/src/AudioAnalysisTools/CommonParameters.cs @@ -101,8 +101,8 @@ public abstract class CommonParameters : IValidatableObject public virtual IEnumerable Validate(ValidationContext validationContext) { - //yield return this.MinHertz.ValidateNotNull(nameof(this.MinHertz)); - //yield return this.MaxHertz.ValidateNotNull(nameof(this.MaxHertz)); + yield return this.MinHertz.ValidateNotNull(nameof(this.MinHertz)); + yield return this.MaxHertz.ValidateNotNull(nameof(this.MaxHertz)); yield return this.ValidateLessThan(this.MinHertz, nameof(this.MinHertz), this.MaxHertz, nameof(this.MaxHertz)); yield return this.DecibelThresholds.ValidateNotNull(nameof(this.DecibelThresholds)); yield return this.DecibelThresholds.ValidateNotEmpty(nameof(this.DecibelThresholds)); diff --git a/src/AudioAnalysisTools/Tracks/MinAndMaxBandwidthParameters.cs b/src/AudioAnalysisTools/Tracks/MinAndMaxBandwidthParameters.cs index a50c72178..d484b0741 100644 --- a/src/AudioAnalysisTools/Tracks/MinAndMaxBandwidthParameters.cs +++ b/src/AudioAnalysisTools/Tracks/MinAndMaxBandwidthParameters.cs @@ -10,24 +10,6 @@ namespace AnalysisPrograms.Recognizers.Base public class MinAndMaxBandwidthParameters : CommonParameters { - /* - /// - /// Gets or sets the bottom bound of a search band. Units are Hertz. - /// A search band is the frequency band within which an algorithm searches for a particular track or event. - /// This is to be carefully distinguished from the top and bottom bounds of a specific event. - /// A search band consists of two parallel lines/freqeuncy bins. - /// An event is represented by a rectangle. - /// Events will/should always lie within a search band. There may be exception in edge cases, i.e. where an event sits on a search bound. - /// - public int? SearchbandMinHertz { get; set; } - - /// - /// Gets or sets the the top bound of a search band. Units are Hertz. - /// A search band is the frequency band within which an algorithm searches for a particular track or event. - /// - public int? SearchbandMaxHertz { get; set; } - */ - /// /// Gets or sets the minimum allowed bandwidth of a spectrogram track or event, units = Hertz. /// diff --git a/src/AudioAnalysisTools/Tracks/OnebinTrackAlgorithm.cs b/src/AudioAnalysisTools/Tracks/OnebinTrackAlgorithm.cs index 1eb53354e..2996586ed 100644 --- a/src/AudioAnalysisTools/Tracks/OnebinTrackAlgorithm.cs +++ b/src/AudioAnalysisTools/Tracks/OnebinTrackAlgorithm.cs @@ -17,6 +17,26 @@ namespace AudioAnalysisTools.Tracks using TowseyLibrary; using TrackType = AudioAnalysisTools.Events.Tracks.TrackType; + /// + /// This class searches a spectrogram for whistles, that is, for tones or spectral peaks that persist in one frequency bin. + /// In practice, the whistles of birds and other natural sources do not occupy a single frequency bin, + /// although this statement is confounded by the choice of recording sample rate and frame size. + /// But typically, a bird whistle spreads itself across three or more frequency bins using typical values for SR etc. + /// In this class, we make an assumption about the spectral profile of a whistle and the user is expected to find the appropriate + /// sample rate, frame size and frame step such that the target whistle is detected using the profile. + /// We define a whistle profile that is 11 bins wide. The actual whistle occupies the centre three bins, ie bins -1, 0 , +1. + /// Bins -2 and +2 are ignored to allow for some flexibility in getting he right combination of sample rate, frame size and frame step. + /// To establish that the centre three bins contain a spectral peak (i.e. are part of a potential whistle), + /// we define top and bottom sidebands, each of width three bins. + /// These are used to establish a baseline intensity which must be less than that of the centre three bins. + /// The bottom sideband = bins -3, -4, -5. The top sideband = bins +3, +4, +5. + /// Defining a whistle this way introduces edge effects at the top and bottom of the spectrogram. + /// In case of the low frequency edge, in order to get as close as possible to the frequency bin zero, we do not incorporate a bottom sidebound into the calculations. + /// Also note that a typical bird whistle is not exactly a pure tone. It typically fluctuates slightly from one frequency bin to an adjacent bin and back. + /// Consequently a final step in this whistle detection algorithm is to merge adjacent whistle tracks. + /// The algorithm is not perfect but it does detect constant tone sounds. Theis algorithm is designed so as not to pick up chirps, + /// i.e. gradually rising and falling tones. However, here again the right choice of SR, frame size and frame step are important. + /// public static class OnebinTrackAlgorithm { private static readonly ILog Log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType); @@ -54,36 +74,41 @@ public static (List Events, List DecibelPlots) GetOnebinTrack } /// - /// This method returns whistle (spectral peak) tracks enclosed in spectral events. + /// This method returns whistle (spectral peak) tracks enclosed as spectral events. /// It averages dB log values incorrectly but it is faster than doing many log conversions. /// - /// The spectrogram to be searched. + /// The spectrogram to be searched. + /// The parameters that determine the search. + /// Enables assignment of a start time (relative to recording) to any valid event. + /// The threshold for detection of a track. /// A list of acoustic events containing whistle tracks. - public static (List ListOfevents, double[] CombinedIntensityArray) GetOnebinTracks( - SpectrogramStandard sonogram, + SpectrogramStandard spectrogram, OnebinTrackParameters parameters, TimeSpan segmentStartOffset, double decibelThreshold) { - var sonogramData = sonogram.Data; - int frameCount = sonogramData.GetLength(0); - int binCount = sonogramData.GetLength(1); - int nyquist = sonogram.NyquistFrequency; + var spectroData = spectrogram.Data; + int frameCount = spectroData.GetLength(0); + int binCount = spectroData.GetLength(1); + int nyquist = spectrogram.NyquistFrequency; double binWidth = nyquist / (double)binCount; - // set lower frequency bins of the search band - int minSearchBin = (int)Math.Floor(parameters.SearchbandMinHertz.Value / binWidth); + // calculate the frequency bin for bottom of search band + // Allow for whistle sideband = one bin + int minSearchBin = (int)Math.Floor(parameters.MinHertz.Value / binWidth); if (minSearchBin < 1) { minSearchBin = 1; } - // set top search bin allowing for the top sideband. - int maxSearchBin = (int)Math.Floor(parameters.SearchbandMaxHertz.Value / binWidth) - 1; - if (maxSearchBin > binCount - 6) + // calculate the frequency bin for top of search band, allowing for the top sideband. + // see class summary above. + int topSideband = 6; + int maxSearchBin = (int)Math.Floor(parameters.MaxHertz.Value / binWidth) - 1; + if (maxSearchBin > binCount - topSideband) { - maxSearchBin = binCount - 6; + maxSearchBin = binCount - topSideband; } // get max and min duration for the whistle event. @@ -92,51 +117,57 @@ public static (List ListOfevents, double[] CombinedIntensityArray) var converter = new UnitConverters( segmentStartOffset: segmentStartOffset.TotalSeconds, - sampleRate: sonogram.SampleRate, - frameSize: sonogram.Configuration.WindowSize, - frameOverlap: sonogram.Configuration.WindowOverlap); + sampleRate: spectrogram.SampleRate, + frameSize: spectrogram.Configuration.WindowSize, + frameOverlap: spectrogram.Configuration.WindowOverlap); //Find all bin peaks and place in peaks matrix - var peaks = new double[frameCount, binCount]; - - // tf = timeframes + // tf = timeframe and bin = frequency bin. + var peaksMatrix = new double[frameCount, binCount]; for (int tf = 0; tf < frameCount; tf++) { for (int bin = minSearchBin; bin <= maxSearchBin; bin++) { - if (sonogramData[tf, bin] < decibelThreshold) + //skip spectrogram cells below threshold + if (spectroData[tf, bin] < decibelThreshold) { continue; } - // here we define the amplitude profile of a whistle. - // The buffer zone around centre of whistle is five bins wide. Ignore bins -2 and +2 - var bandIntensity = ((sonogramData[tf, bin - 1] * 0.5) + sonogramData[tf, bin] + (sonogramData[tf, bin + 1] * 0.5)) / 2.0; - var topSidebandIntensity = (sonogramData[tf, bin + 3] + sonogramData[tf, bin + 4] + sonogramData[tf, bin + 5]) / 3.0; + // Here we define the amplitude profile of a whistle. The profile is 11 bins wide. + // The whistle occupies the centre three bins, ie bins -1, 0 , +1. Bins -2 and +2 are ignored. + // A top and bottom sidebands, each of width three bins, are used to establish a baseline intensity. + // The bottom sideband = bins -3, -4, -5. The top sideband = bins +3, +4, +5. + // For more detail see the class summary. + var bandIntensity = ((spectroData[tf, bin - 1] * 0.5) + spectroData[tf, bin] + (spectroData[tf, bin + 1] * 0.5)) / 2.0; + var topSidebandIntensity = (spectroData[tf, bin + 3] + spectroData[tf, bin + 4] + spectroData[tf, bin + 5]) / 3.0; var netAmplitude = 0.0; if (bin < 5) { + // if bin < 5, i.e. too close to the bottom bin of the spectrogram, then only subtract intensity of the top sideband. + // see class summary above. netAmplitude = bandIntensity - topSidebandIntensity; } else { - var bottomSideBandIntensity = (sonogramData[tf, bin - 3] + sonogramData[tf, bin - 4] + sonogramData[tf, bin - 5]) / 3.0; + var bottomSideBandIntensity = (spectroData[tf, bin - 3] + spectroData[tf, bin - 4] + spectroData[tf, bin - 5]) / 3.0; netAmplitude = bandIntensity - ((topSidebandIntensity + bottomSideBandIntensity) / 2.0); } if (netAmplitude >= decibelThreshold) { - peaks[tf, bin] = sonogramData[tf, bin]; + peaksMatrix[tf, bin] = spectroData[tf, bin]; } } } - var tracks = GetOnebinTracks(peaks, minDuration, maxDuration, decibelThreshold, converter); + var tracks = GetOnebinTracks(peaksMatrix, minDuration, maxDuration, decibelThreshold, converter); // Initialise tracks as events and get the combined intensity array. var events = new List(); var combinedIntensityArray = new double[frameCount]; - var scoreRange = new Interval(0, decibelThreshold * 5); + int scalingFactor = 5; // used to make plot easier to interpret. + var scoreRange = new Interval(0, decibelThreshold * scalingFactor); foreach (var track in tracks) { @@ -167,7 +198,9 @@ public static (List ListOfevents, double[] CombinedIntensityArray) } // This algorithm tends to produce temporally overlapped whistle events in adjacent channels. - // Combine overlapping whistle events + // This is because a typical bird whistle is not exactly horozontal. + // Combine overlapping whistle events if they are within four frequency bins of each other. + // The value 4 is somewhat arbitrary but is consistent with the whistle profile described in the class comments above. var hertzDifference = 4 * binWidth; var whistleEvents = WhistleEvent.CombineAdjacentWhistleEvents(events, hertzDifference); @@ -191,8 +224,8 @@ public static List GetOnebinTracks(double[,] peaks, double minDuration, d var tracks = new List(); // Look for possible track starts and initialise as track. - // Cannot include edge rows & columns because of edge effects. // Each row is a time frame which is a spectrum. Each column is a frequency bin + // Cannot include the three edge columns/frequency bins because of edge effects when determining a valid peak. for (int row = 0; row < frameCount; row++) { for (int col = 3; col < bandwidthBinCount - 3; col++) diff --git a/src/AudioAnalysisTools/Tracks/OnebinTrackParameters.cs b/src/AudioAnalysisTools/Tracks/OnebinTrackParameters.cs index 8df168eea..97e02b536 100644 --- a/src/AudioAnalysisTools/Tracks/OnebinTrackParameters.cs +++ b/src/AudioAnalysisTools/Tracks/OnebinTrackParameters.cs @@ -20,16 +20,6 @@ namespace AnalysisPrograms.Recognizers.Base [YamlTypeTag(typeof(OnebinTrackParameters))] public class OnebinTrackParameters : CommonParameters { - /// - /// Gets or sets a value indicating the minimum Hertz value of the search band. - /// - public int? SearchbandMinHertz { get; set; } - - /// - /// Gets or sets a value indicating the maximum Hertz value of the search band. - /// - public int? SearchbandMaxHertz { get; set; } - /// /// Gets or sets a value indicating whether proximal whistle tracks are to be combined. /// Proximal means the whistle tracks are in the same frequency band diff --git a/tests/Acoustics.Test/AnalysisPrograms/Recognizers/GenericRecognizerTests.cs b/tests/Acoustics.Test/AnalysisPrograms/Recognizers/GenericRecognizerTests.cs index dbc31b6b6..37e3ee6e7 100644 --- a/tests/Acoustics.Test/AnalysisPrograms/Recognizers/GenericRecognizerTests.cs +++ b/tests/Acoustics.Test/AnalysisPrograms/Recognizers/GenericRecognizerTests.cs @@ -228,8 +228,8 @@ public void TestWhistleAlgorithm() FrameStep = 512, WindowFunction = WindowFunctions.HANNING, BgNoiseThreshold = 0.0, - SearchbandMinHertz = 0, //340, - SearchbandMaxHertz = 1000, //560, + MinHertz = 0, //340, + MaxHertz = 1000, //560, MinDuration = 4, MaxDuration = 6, SpeciesName = "NoName", @@ -244,8 +244,8 @@ public void TestWhistleAlgorithm() FrameStep = 512, WindowFunction = WindowFunctions.HANNING, BgNoiseThreshold = 0.0, - SearchbandMinHertz = 0, - SearchbandMaxHertz = 1000, + MinHertz = 0, + MaxHertz = 1000, MinDuration = 4, MaxDuration = 6, SpeciesName = "NoName", @@ -260,8 +260,8 @@ public void TestWhistleAlgorithm() FrameStep = 512, WindowFunction = WindowFunctions.HANNING, BgNoiseThreshold = 0.0, - SearchbandMinHertz = 301, - SearchbandMaxHertz = 517, + MinHertz = 301, + MaxHertz = 517, MinDuration = 4, MaxDuration = 6, SpeciesName = "NoName", @@ -276,8 +276,8 @@ public void TestWhistleAlgorithm() FrameStep = 512, WindowFunction = WindowFunctions.HANNING, BgNoiseThreshold = 0.0, - SearchbandMinHertz = 100, - SearchbandMaxHertz = 700, + MinHertz = 100, + MaxHertz = 700, MinDuration = 4, MaxDuration = 6, SpeciesName = "NoName", @@ -465,8 +465,8 @@ public void TestOnebinTrackAlgorithm() var parameters = new OnebinTrackParameters() { - SearchbandMinHertz = 500, - SearchbandMaxHertz = 6000, + MinHertz = 500, + MaxHertz = 6000, MinDuration = 0.2, MaxDuration = 1.1, CombinePossibleSyllableSequence = false,