Skip to content

Commit

Permalink
ENH: Add Audacity extended label track format, fix #213
Browse files Browse the repository at this point in the history
- Add src/crowsetta/formats/bbox/audbbox.py
- TST: Add tests/data_for_tests/aud-bbox/ with files
- TST: Add audbbox fixtures in tests
- TST: Add tests/test_formats/test_bbox/test_audbbox.py
- Add src/crowsetta/data/audbbox/ with example data
  • Loading branch information
NickleDave committed Mar 1, 2023
1 parent 38c7a68 commit 0c3c0b6
Show file tree
Hide file tree
Showing 10 changed files with 765 additions and 0 deletions.
Empty file.
3 changes: 3 additions & 0 deletions src/crowsetta/data/audbbox/citation.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
scikit-maad developers. (2018-present).
Example data file.
Adapted under BSD License: https://github.com/scikit-maad/scikit-maad/blob/production/LICENSE
36 changes: 36 additions & 0 deletions src/crowsetta/data/audbbox/spinetail.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
0.101385 0.367520 SP
\ 6441.064453 12296.577148
0.506924 3.041545 CRER
\ 2593.156006 8866.920898
1.203945 1.482753 SP
\ 6608.365234 11543.726562
2.724718 3.218969 SP
\ 4851.710938 12631.178711
5.221319 7.869998 CRER
\ 2509.505615 8699.620117
6.260514 6.666053 SP
\ 5939.163574 12798.478516
7.946037 8.288210 SP
\ 4600.760254 13049.430664
8.896519 9.302059 SP
\ 5827.929351 12513.294481
9.973733 10.353927 SP
\ 4851.710938 13969.581055
11.329756 13.750319 CRER
\ 2091.254639 9117.871094
11.773314 12.077469 SP
\ 6106.464355 12296.577148
12.660432 12.939240 SP
\ 5604.562500 12296.577148
15.435842 15.879400 SP
\ 4015.209229 13216.730469
16.170882 16.563748 SP
\ 4098.859375 12463.878906
16.437017 18.730849 CRER
\ 2676.806152 8699.620117
17.159384 17.514231 SP
\ 5688.213379 12714.829102
18.198578 18.502733 SP
\ 5353.612305 12463.878906
19.073023 19.465889 SP
\ 4349.810059 12296.577148
2 changes: 2 additions & 0 deletions src/crowsetta/formats/bbox/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from .audbbox import AudBBox
from .raven import Raven

__all__ = [
"AudBBox",
"Raven",
]
245 changes: 245 additions & 0 deletions src/crowsetta/formats/bbox/audbbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
"""Module for Audacity label tracks
in extended format, exported to .txt files
https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Extended_format_with_frequency_ranges
"""
import pathlib
from typing import ClassVar, List, Optional

import attr
import pandas as pd
import pandera
from pandera.typing import Series

import crowsetta
from crowsetta.typing import PathLike


def txt_to_records(aud_txt_path: PathLike) -> list[dict]:
"""Load a txt file in Audacity extended label track format
into records for a `pandas.DataFrame``.
Returns a ``list`` of ``dict`` that can be made into a
``DataFrame`` by calling ``pandas.DataFrame.from_records``.
Parameters
----------
aud_txt_path : str, pathlib.Path
Returns
-------
records : list
Of ``dict``, each ``dict`` a row
in the ``DataFrame``.
Notes
-----
We work with Audacity txt files this way, instead of
loading with ``pandas.read_csv`` then munging, so that we can
be sure that we can round-trip data without corrupting it.
"""
with pathlib.Path(aud_txt_path).open('r') as fp:
lines = fp.read().splitlines()
lines = [line.split('\t') for line in lines]

records = []
# next line: iterate over lines in groups of 2
for row1, row2 in zip(*[iter(lines)] * 2, strict=True):
record = {
"begin_time_s": float(row1[0]),
"end_time_s": float(row1[1]),
"label": str(row1[2]),
"low_freq_hz": float(row2[1]),
"high_freq_hz": float(row2[2]),
}
records.append(record)
return records


def df_to_lines(df: pd.DataFrame) -> list[str]:
"""Convert a pandas DataFrame to a list of strings
that can be saved as a txt file in Audacity extended
label track format.
This function is (roughly) the inverse of
``crowsetta.formats.bbox.audbbox.txt_to_records``.
Parameters
----------
df : pandas.DataFrame
With contents of a .txt file in Audacity extended label track format,
after being loaded and parsed by ``crowsetta.formats.bbox.audbbox.audbbox_txt_to_df``
Returns
-------
lines : list
List of strings that can be saved to a text file
by calling ``writelines``.
Notes
-----
We work with Audacity txt files this way, instead of
munging and then calling ``pandas.DataFrame.to_csv``,
so that we can be sure that we can round-trip data
without corrupting it.
"""
df = AudBBoxSchema.validate(df)

lines = []
for record in df.itertuples():
row1 = f"{float(record.begin_time_s)}\t{float(record.end_time_s)}\t{record.label}\n"
row2 = f"\\\t{float(record.low_freq_hz)}\t{float(record.high_freq_hz)}\n"
lines.extend((row1, row2))

return lines


class AudBBoxSchema(pandera.SchemaModel):
"""A ``pandera.SchemaModel`` that validates ``pandas`` dataframes
loaded from Audacity label tracks
in extended format, exported to .txt files
https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Extended_format_with_frequency_ranges
"""

begin_time_s: Series[float] = pandera.Field(coerce=True)
end_time_s: Series[float] = pandera.Field(coerce=True)
label: Series[pd.StringDtype] = pandera.Field(coerce=True)
low_freq_hz: Series[float] = pandera.Field(coerce=True)
high_freq_hz: Series[float] = pandera.Field(coerce=True)

class Config:
ordered = True
strict = True


@crowsetta.interface.BBoxLike.register
@attr.define
class AudBBox:
"""Class that represents Audacity label tracks
in extended format, exported to .txt files
https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Extended_format_with_frequency_ranges
Attributes
----------
name: str
Shorthand name for annotation format: 'aud-bbox'.
ext: str
Extension of files in annotation format: '.txt'
df : pandas.DataFrame
with annotations loaded into it
annot_path : str, pathlib.Path
Path to Audacity .txt file from which annotations were loaded.
audio_path : str. pathlib.Path
Path to audio file that the Audacity .txt file annotates.
"""
COLUMNS_MAP: ClassVar[dict] = {
0: "begin_time_s",
1: "end_time_s",
2: "label",
3: "low_freq_hz",
4: "high_freq_hz",
}

name: ClassVar[str] = 'aud-bbox'
ext: ClassVar[str] = '.txt'

df: pd.DataFrame
annot_path: pathlib.Path
audio_path: Optional[pathlib.Path] = attr.field(default=None,
converter=attr.converters.optional(pathlib.Path))

@classmethod
def from_file(cls,
annot_path: PathLike,
audio_path: Optional[PathLike] = None) -> 'Self': # noqa: F821
"""Load annotations from a Audacity annotation file with bbox,
created by exporting a Selection Table.
Parameters
----------
annot_path : str, pathlib.Path
Path to a .txt file exported from Audacity bbox.
audio_path : str, pathlib.Path
Path to audio file that the Audacity bbox .txt file annotates.
Optional, defaults to None.
Examples
--------
>>> example = crowsetta.data.get('aud-bbox')
>>> audbbox = crowsetta.formats.bbox.AudBBox.from_file(example.annot_path)
"""
annot_path = pathlib.Path(annot_path)
crowsetta.validation.validate_ext(annot_path, extension=cls.ext)
records = crowsetta.formats.bbox.audbbox.txt_to_records(annot_path)
df = pd.DataFrame.from_records(records)
df = crowsetta.formats.bbox.audbbox.AudBBoxSchema.validate(df)

return cls(
df=df,
annot_path=annot_path,
audio_path=audio_path,
)

def to_bbox(self) -> List[crowsetta.BBox]:
"""Convert this Audacity extended label track annotation to a ``list`` of ``crowsetta.Bbox``.
Returns
-------
bboxes : list
of ``crowsetta.BBox``
Examples
--------
>>> example = crowsetta.data.get('aud-bbox')
>>> audbbox = crowsetta.formats.bbox.AudBBox.from_file(example.annot_path)
>>> bboxes = audbbox.to_bbox()
"""
bboxes = []
for begin_time, end_time, label, low_freq, high_freq in zip(
self.df.begin_time_s.values,
self.df.end_time_s.values,
self.df.label.values,
self.df.low_freq_hz.values,
self.df.high_freq_hz.values,
):
bboxes.append(
crowsetta.BBox(onset=begin_time,
offset=end_time,
low_freq=low_freq,
high_freq=high_freq,
label=label)
)
return bboxes

def to_annot(self) -> crowsetta.Annotation:
"""Convert this Audacity bbox annotation to a ``crowsetta.Annotation``.
Returns
-------
annot : crowsetta.Annotation
Examples
--------
>>> example = crowsetta.data.get('aud-bbox')
>>> audacitybbox = crowsetta.formats.bbox.AudBBox.from_file(example.annot_path)
>>> annot = audacitybbox.to_annot()
"""
bboxes = self.to_bbox()
return crowsetta.Annotation(annot_path=self.annot_path,
notated_path=self.audio_path,
bboxes=bboxes)

def to_file(self,
annot_path: PathLike) -> None:
"""Make a .txt file from this annotation
in extended label track format that can be read by Audacity.
Parameters
----------
annot_path : str, pathlib.Path
Path including filename where file should be saved.
Must have extension '.txt'
"""
crowsetta.validation.validate_ext(annot_path, extension=self.ext)
lines = df_to_lines(self.df)
with pathlib.Path(annot_path).open('w') as fp:
fp.writelines(lines)
Loading

0 comments on commit 0c3c0b6

Please sign in to comment.