-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ENH: Add Audacity extended label track format, fix #213
- Add src/crowsetta/formats/bbox/audbbox.py - TST: Add tests/data_for_tests/aud-bbox/ with files - TST: Add audbbox fixtures in tests - TST: Add tests/test_formats/test_bbox/test_audbbox.py - Add src/crowsetta/data/audbbox/ with example data
- Loading branch information
1 parent
38c7a68
commit 0c3c0b6
Showing
10 changed files
with
765 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
scikit-maad developers. (2018-present). | ||
Example data file. | ||
Adapted under BSD License: https://github.com/scikit-maad/scikit-maad/blob/production/LICENSE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
0.101385 0.367520 SP | ||
\ 6441.064453 12296.577148 | ||
0.506924 3.041545 CRER | ||
\ 2593.156006 8866.920898 | ||
1.203945 1.482753 SP | ||
\ 6608.365234 11543.726562 | ||
2.724718 3.218969 SP | ||
\ 4851.710938 12631.178711 | ||
5.221319 7.869998 CRER | ||
\ 2509.505615 8699.620117 | ||
6.260514 6.666053 SP | ||
\ 5939.163574 12798.478516 | ||
7.946037 8.288210 SP | ||
\ 4600.760254 13049.430664 | ||
8.896519 9.302059 SP | ||
\ 5827.929351 12513.294481 | ||
9.973733 10.353927 SP | ||
\ 4851.710938 13969.581055 | ||
11.329756 13.750319 CRER | ||
\ 2091.254639 9117.871094 | ||
11.773314 12.077469 SP | ||
\ 6106.464355 12296.577148 | ||
12.660432 12.939240 SP | ||
\ 5604.562500 12296.577148 | ||
15.435842 15.879400 SP | ||
\ 4015.209229 13216.730469 | ||
16.170882 16.563748 SP | ||
\ 4098.859375 12463.878906 | ||
16.437017 18.730849 CRER | ||
\ 2676.806152 8699.620117 | ||
17.159384 17.514231 SP | ||
\ 5688.213379 12714.829102 | ||
18.198578 18.502733 SP | ||
\ 5353.612305 12463.878906 | ||
19.073023 19.465889 SP | ||
\ 4349.810059 12296.577148 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
from .audbbox import AudBBox | ||
from .raven import Raven | ||
|
||
__all__ = [ | ||
"AudBBox", | ||
"Raven", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,245 @@ | ||
"""Module for Audacity label tracks | ||
in extended format, exported to .txt files | ||
https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Extended_format_with_frequency_ranges | ||
""" | ||
import pathlib | ||
from typing import ClassVar, List, Optional | ||
|
||
import attr | ||
import pandas as pd | ||
import pandera | ||
from pandera.typing import Series | ||
|
||
import crowsetta | ||
from crowsetta.typing import PathLike | ||
|
||
|
||
def txt_to_records(aud_txt_path: PathLike) -> list[dict]: | ||
"""Load a txt file in Audacity extended label track format | ||
into records for a `pandas.DataFrame``. | ||
Returns a ``list`` of ``dict`` that can be made into a | ||
``DataFrame`` by calling ``pandas.DataFrame.from_records``. | ||
Parameters | ||
---------- | ||
aud_txt_path : str, pathlib.Path | ||
Returns | ||
------- | ||
records : list | ||
Of ``dict``, each ``dict`` a row | ||
in the ``DataFrame``. | ||
Notes | ||
----- | ||
We work with Audacity txt files this way, instead of | ||
loading with ``pandas.read_csv`` then munging, so that we can | ||
be sure that we can round-trip data without corrupting it. | ||
""" | ||
with pathlib.Path(aud_txt_path).open('r') as fp: | ||
lines = fp.read().splitlines() | ||
lines = [line.split('\t') for line in lines] | ||
|
||
records = [] | ||
# next line: iterate over lines in groups of 2 | ||
for row1, row2 in zip(*[iter(lines)] * 2, strict=True): | ||
record = { | ||
"begin_time_s": float(row1[0]), | ||
"end_time_s": float(row1[1]), | ||
"label": str(row1[2]), | ||
"low_freq_hz": float(row2[1]), | ||
"high_freq_hz": float(row2[2]), | ||
} | ||
records.append(record) | ||
return records | ||
|
||
|
||
def df_to_lines(df: pd.DataFrame) -> list[str]: | ||
"""Convert a pandas DataFrame to a list of strings | ||
that can be saved as a txt file in Audacity extended | ||
label track format. | ||
This function is (roughly) the inverse of | ||
``crowsetta.formats.bbox.audbbox.txt_to_records``. | ||
Parameters | ||
---------- | ||
df : pandas.DataFrame | ||
With contents of a .txt file in Audacity extended label track format, | ||
after being loaded and parsed by ``crowsetta.formats.bbox.audbbox.audbbox_txt_to_df`` | ||
Returns | ||
------- | ||
lines : list | ||
List of strings that can be saved to a text file | ||
by calling ``writelines``. | ||
Notes | ||
----- | ||
We work with Audacity txt files this way, instead of | ||
munging and then calling ``pandas.DataFrame.to_csv``, | ||
so that we can be sure that we can round-trip data | ||
without corrupting it. | ||
""" | ||
df = AudBBoxSchema.validate(df) | ||
|
||
lines = [] | ||
for record in df.itertuples(): | ||
row1 = f"{float(record.begin_time_s)}\t{float(record.end_time_s)}\t{record.label}\n" | ||
row2 = f"\\\t{float(record.low_freq_hz)}\t{float(record.high_freq_hz)}\n" | ||
lines.extend((row1, row2)) | ||
|
||
return lines | ||
|
||
|
||
class AudBBoxSchema(pandera.SchemaModel): | ||
"""A ``pandera.SchemaModel`` that validates ``pandas`` dataframes | ||
loaded from Audacity label tracks | ||
in extended format, exported to .txt files | ||
https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Extended_format_with_frequency_ranges | ||
""" | ||
|
||
begin_time_s: Series[float] = pandera.Field(coerce=True) | ||
end_time_s: Series[float] = pandera.Field(coerce=True) | ||
label: Series[pd.StringDtype] = pandera.Field(coerce=True) | ||
low_freq_hz: Series[float] = pandera.Field(coerce=True) | ||
high_freq_hz: Series[float] = pandera.Field(coerce=True) | ||
|
||
class Config: | ||
ordered = True | ||
strict = True | ||
|
||
|
||
@crowsetta.interface.BBoxLike.register | ||
@attr.define | ||
class AudBBox: | ||
"""Class that represents Audacity label tracks | ||
in extended format, exported to .txt files | ||
https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Extended_format_with_frequency_ranges | ||
Attributes | ||
---------- | ||
name: str | ||
Shorthand name for annotation format: 'aud-bbox'. | ||
ext: str | ||
Extension of files in annotation format: '.txt' | ||
df : pandas.DataFrame | ||
with annotations loaded into it | ||
annot_path : str, pathlib.Path | ||
Path to Audacity .txt file from which annotations were loaded. | ||
audio_path : str. pathlib.Path | ||
Path to audio file that the Audacity .txt file annotates. | ||
""" | ||
COLUMNS_MAP: ClassVar[dict] = { | ||
0: "begin_time_s", | ||
1: "end_time_s", | ||
2: "label", | ||
3: "low_freq_hz", | ||
4: "high_freq_hz", | ||
} | ||
|
||
name: ClassVar[str] = 'aud-bbox' | ||
ext: ClassVar[str] = '.txt' | ||
|
||
df: pd.DataFrame | ||
annot_path: pathlib.Path | ||
audio_path: Optional[pathlib.Path] = attr.field(default=None, | ||
converter=attr.converters.optional(pathlib.Path)) | ||
|
||
@classmethod | ||
def from_file(cls, | ||
annot_path: PathLike, | ||
audio_path: Optional[PathLike] = None) -> 'Self': # noqa: F821 | ||
"""Load annotations from a Audacity annotation file with bbox, | ||
created by exporting a Selection Table. | ||
Parameters | ||
---------- | ||
annot_path : str, pathlib.Path | ||
Path to a .txt file exported from Audacity bbox. | ||
audio_path : str, pathlib.Path | ||
Path to audio file that the Audacity bbox .txt file annotates. | ||
Optional, defaults to None. | ||
Examples | ||
-------- | ||
>>> example = crowsetta.data.get('aud-bbox') | ||
>>> audbbox = crowsetta.formats.bbox.AudBBox.from_file(example.annot_path) | ||
""" | ||
annot_path = pathlib.Path(annot_path) | ||
crowsetta.validation.validate_ext(annot_path, extension=cls.ext) | ||
records = crowsetta.formats.bbox.audbbox.txt_to_records(annot_path) | ||
df = pd.DataFrame.from_records(records) | ||
df = crowsetta.formats.bbox.audbbox.AudBBoxSchema.validate(df) | ||
|
||
return cls( | ||
df=df, | ||
annot_path=annot_path, | ||
audio_path=audio_path, | ||
) | ||
|
||
def to_bbox(self) -> List[crowsetta.BBox]: | ||
"""Convert this Audacity extended label track annotation to a ``list`` of ``crowsetta.Bbox``. | ||
Returns | ||
------- | ||
bboxes : list | ||
of ``crowsetta.BBox`` | ||
Examples | ||
-------- | ||
>>> example = crowsetta.data.get('aud-bbox') | ||
>>> audbbox = crowsetta.formats.bbox.AudBBox.from_file(example.annot_path) | ||
>>> bboxes = audbbox.to_bbox() | ||
""" | ||
bboxes = [] | ||
for begin_time, end_time, label, low_freq, high_freq in zip( | ||
self.df.begin_time_s.values, | ||
self.df.end_time_s.values, | ||
self.df.label.values, | ||
self.df.low_freq_hz.values, | ||
self.df.high_freq_hz.values, | ||
): | ||
bboxes.append( | ||
crowsetta.BBox(onset=begin_time, | ||
offset=end_time, | ||
low_freq=low_freq, | ||
high_freq=high_freq, | ||
label=label) | ||
) | ||
return bboxes | ||
|
||
def to_annot(self) -> crowsetta.Annotation: | ||
"""Convert this Audacity bbox annotation to a ``crowsetta.Annotation``. | ||
Returns | ||
------- | ||
annot : crowsetta.Annotation | ||
Examples | ||
-------- | ||
>>> example = crowsetta.data.get('aud-bbox') | ||
>>> audacitybbox = crowsetta.formats.bbox.AudBBox.from_file(example.annot_path) | ||
>>> annot = audacitybbox.to_annot() | ||
""" | ||
bboxes = self.to_bbox() | ||
return crowsetta.Annotation(annot_path=self.annot_path, | ||
notated_path=self.audio_path, | ||
bboxes=bboxes) | ||
|
||
def to_file(self, | ||
annot_path: PathLike) -> None: | ||
"""Make a .txt file from this annotation | ||
in extended label track format that can be read by Audacity. | ||
Parameters | ||
---------- | ||
annot_path : str, pathlib.Path | ||
Path including filename where file should be saved. | ||
Must have extension '.txt' | ||
""" | ||
crowsetta.validation.validate_ext(annot_path, extension=self.ext) | ||
lines = df_to_lines(self.df) | ||
with pathlib.Path(annot_path).open('w') as fp: | ||
fp.writelines(lines) |
Oops, something went wrong.