From da6f385bd7808a51d750184dbf65d8df7b69f36a Mon Sep 17 00:00:00 2001 From: David Nicholson Date: Mon, 20 Mar 2023 17:42:29 -0400 Subject: [PATCH] DOC/CLN: Fix cross-refs in docstrings, style. Fixes #239 Mostly fixing cross-refs in docstrings. Also makes some minor style/formatting fixes, e.g., ".txt file" -> "txt file", adds citations in a couple places. Add type hints in validation.py --- src/crowsetta/annotation.py | 17 ++-- src/crowsetta/formats/__init__.py | 18 +++- src/crowsetta/formats/bbox/audbbox.py | 58 ++++++------ src/crowsetta/formats/bbox/raven.py | 30 +++--- src/crowsetta/formats/seq/audseq.py | 47 +++++----- src/crowsetta/formats/seq/birdsongrec.py | 44 +++++---- src/crowsetta/formats/seq/generic.py | 97 ++++++++++---------- src/crowsetta/formats/seq/notmat.py | 17 ++-- src/crowsetta/formats/seq/simple.py | 49 +++++----- src/crowsetta/formats/seq/textgrid.py | 47 +++++----- src/crowsetta/formats/seq/timit.py | 46 +++++++--- src/crowsetta/formats/seq/yarden.py | 25 +++-- src/crowsetta/interface/base.py | 6 +- src/crowsetta/interface/bbox/base.py | 10 +- src/crowsetta/interface/seq/base.py | 4 +- src/crowsetta/segment.py | 4 +- src/crowsetta/sequence.py | 77 +++++++++------- src/crowsetta/transcriber.py | 112 ++++++++++++++++++----- src/crowsetta/validation.py | 27 ++++-- 19 files changed, 435 insertions(+), 300 deletions(-) diff --git a/src/crowsetta/annotation.py b/src/crowsetta/annotation.py index 3ec723f..b7ec101 100644 --- a/src/crowsetta/annotation.py +++ b/src/crowsetta/annotation.py @@ -18,27 +18,28 @@ class Annotation: Attributes ---------- annot_path : str, pathlib.Path - path to file from which annotations were loaded + Path to file from which annotations were loaded. notated_path : str, pathlib.Path - path to file that ``annot_path`` annotates. + Path to file that ``annot_path`` annotates. E.g., an audio file, or an array file that contains a spectrogram generated from audio. Optional, default is None. seq : crowsetta.Sequence - a sequence of annotated segments, + A sequence of annotated segments, each having an onset time, offset time, - and label. + and label. A :class:`crowsetta.Sequence` instance. bboxes : list - of ``crowsetta.BBox``, - annotated bounding boxes, + List of annotated bounding boxes, each having an onset time, offset time, lowest frequency, highest frequency, and label. + Each item in the list will be a + :class:`crowsetta.BBox` instance. Notes ----- - A ``crowsetta.Annotation`` can have a ``seq`` - or ``bboxes``, but not both. + A :class:`crowsetta.Annotation` can have either a ``seq`` + attribute or a ``bboxes`` attribute, but not both. Examples -------- diff --git a/src/crowsetta/formats/__init__.py b/src/crowsetta/formats/__init__.py index 3c7bc0d..9a66d8a 100644 --- a/src/crowsetta/formats/__init__.py +++ b/src/crowsetta/formats/__init__.py @@ -16,6 +16,13 @@ FORMATS[attr.name] = attr +__all__ = [ + 'bbox', + 'FORMATS', + 'seq' +] + + def by_name(name: str) -> Type: """Get an annotation class by its string name @@ -63,10 +70,10 @@ def as_list() -> list[str]: def register_format(format_class: Type) -> Type: """Decorator to register annotation formats. - Adds class to ``crowsetta.formats``. + Adds class to :mod:`crowsetta.formats`. The decorator maps the class variable ``name``, a string, to the class itself, so that calling - ``crowsetta.formats.by_name`` with that string + :func:`crowsetta.formats.by_name` with that string will return the class. Parameters @@ -74,15 +81,16 @@ def register_format(format_class: Type) -> Type: format_class : class A class that has the required class variables and adheres to one of the interfaces - defined in ``crowsetta.interface``, - either ``SeqLike`` or ``BBoxLike``. + defined in :mod:`crowsetta.interface`, + either :class:`~crowsetta.interface.seq.SeqLike` + or :class:`~crowsetta.interface.bbox.BBoxLike`. Returns ------- format_class : class The same class, unchanged. This decorator only adds the class - to ``crowsetta.formats.FORMATS``. + to :data:`crowsetta.formats.FORMATS`. """ if not issubclass(format_class, interface.seq.SeqLike) and not issubclass(format_class, interface.bbox.BBoxLike): raise TypeError(f"format class must be subclass of SeqLike or BBoxLike, but was not: {format_class}") diff --git a/src/crowsetta/formats/bbox/audbbox.py b/src/crowsetta/formats/bbox/audbbox.py index 45278c2..fe2aca0 100644 --- a/src/crowsetta/formats/bbox/audbbox.py +++ b/src/crowsetta/formats/bbox/audbbox.py @@ -1,5 +1,5 @@ """Module for Audacity label tracks -in extended format, exported to .txt files +in extended format, exported to txt files https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Extended_format_with_frequency_ranges """ from __future__ import annotations @@ -18,10 +18,10 @@ def txt_to_records(aud_txt_path: PathLike) -> list[dict]: """Load a txt file in Audacity extended label track format - into records for a `pandas.DataFrame``. + into records for a :type:`pandas.DataFrame`. - Returns a ``list`` of ``dict`` that can be made into a - ``DataFrame`` by calling ``pandas.DataFrame.from_records``. + Returns a :class:`list` of :class:`dict` that can be made into a + :class:`~pandas.DataFrame` by calling :meth:`pandas.DataFrame.from_records`. Parameters ---------- @@ -30,13 +30,13 @@ def txt_to_records(aud_txt_path: PathLike) -> list[dict]: Returns ------- records : list - Of ``dict``, each ``dict`` a row - in the ``DataFrame``. + Of :class:`dict`, each :class:`dict` will become + a row in the :class:`~pandas.DataFrame`. Notes ----- We work with Audacity txt files this way, instead of - loading with ``pandas.read_csv`` then munging, so that we can + loading with :func:`pandas.read_csv` then munging, so that we can be sure that we can round-trip data without corrupting it. """ with pathlib.Path(aud_txt_path).open("r") as fp: @@ -58,29 +58,30 @@ def txt_to_records(aud_txt_path: PathLike) -> list[dict]: def df_to_lines(df: pd.DataFrame) -> list[str]: - """Convert a pandas DataFrame to a list of strings - that can be saved as a txt file in Audacity extended + """Convert a :type:`pandas.DataFrame` to a + :class:`list` of :class:`str` that can be saved + as a txt file in Audacity extended label track format. This function is (roughly) the inverse of - ``crowsetta.formats.bbox.audbbox.txt_to_records``. + :func:`crowsetta.formats.bbox.audbbox.txt_to_records`. Parameters ---------- df : pandas.DataFrame - With contents of a .txt file in Audacity extended label track format, - after being loaded and parsed by ``crowsetta.formats.bbox.audbbox.audbbox_txt_to_df`` + With contents of a txt file in Audacity extended label track format, + after being loaded and parsed by :func:`crowsetta.formats.bbox.audbbox.audbbox_txt_to_df` Returns ------- lines : list List of strings that can be saved to a text file - by calling ``writelines``. + by calling :func:`writelines`. Notes ----- We work with Audacity txt files this way, instead of - munging and then calling ``pandas.DataFrame.to_csv``, + munging and then calling :meth:`pandas.DataFrame.to_csv`, so that we can be sure that we can round-trip data without corrupting it. """ @@ -96,9 +97,10 @@ def df_to_lines(df: pd.DataFrame) -> list[str]: class AudBBoxSchema(pandera.SchemaModel): - """A ``pandera.SchemaModel`` that validates ``pandas`` dataframes + """A :class:`pandera.SchemaModel` that + validates :mod:`pandas` dataframes loaded from Audacity label tracks - in extended format, exported to .txt files + in extended format, exported to txt files https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Extended_format_with_frequency_ranges """ @@ -117,7 +119,7 @@ class Config: @attr.define class AudBBox: """Class that represents Audacity label tracks - in extended format, exported to .txt files + in extended format, exported to txt files https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Extended_format_with_frequency_ranges Attributes @@ -129,9 +131,9 @@ class AudBBox: df : pandas.DataFrame with annotations loaded into it annot_path : str, pathlib.Path - Path to Audacity .txt file from which annotations were loaded. + Path to Audacity txt file from which annotations were loaded. audio_path : str. pathlib.Path - Path to audio file that the Audacity .txt file annotates. + Path to audio file that the Audacity txt file annotates. """ COLUMNS_MAP: ClassVar[dict] = { @@ -151,15 +153,15 @@ class AudBBox: @classmethod def from_file(cls, annot_path: PathLike, audio_path: Optional[PathLike] = None) -> "Self": # noqa: F821 - """Load annotations from a Audacity annotation file with bbox, + """Load annotations from an Audacity annotation file with bounding boxes, created by exporting a Selection Table. Parameters ---------- annot_path : str, pathlib.Path - Path to a .txt file exported from Audacity bbox. + Path to a txt file exported from Audacity bbox. audio_path : str, pathlib.Path - Path to audio file that the Audacity bbox .txt file annotates. + Path to audio file that the Audacity bbox txt file annotates. Optional, defaults to None. Examples @@ -172,7 +174,7 @@ def from_file(cls, annot_path: PathLike, audio_path: Optional[PathLike] = None) records = crowsetta.formats.bbox.audbbox.txt_to_records(annot_path) df = pd.DataFrame.from_records(records) if len(df) < 1: - raise ValueError(f"Cannot load annotations, " f"there are no rows in Audacity .txt file:\n{df}") + raise ValueError(f"Cannot load annotations, " f"there are no rows in Audacity txt file:\n{df}") df = crowsetta.formats.bbox.audbbox.AudBBoxSchema.validate(df) return cls( @@ -182,12 +184,13 @@ def from_file(cls, annot_path: PathLike, audio_path: Optional[PathLike] = None) ) def to_bbox(self) -> List[crowsetta.BBox]: - """Convert this Audacity extended label track annotation to a ``list`` of ``crowsetta.Bbox``. + """Convert this Audacity extended label track annotation + to a :class:`list` of :class:`crowsetta.Bbox`. Returns ------- bboxes : list - of ``crowsetta.BBox`` + A :class:`list` of :class:`crowsetta.BBox` instances. Examples -------- @@ -209,7 +212,8 @@ def to_bbox(self) -> List[crowsetta.BBox]: return bboxes def to_annot(self) -> crowsetta.Annotation: - """Convert this Audacity bbox annotation to a ``crowsetta.Annotation``. + """Convert this Audacity bbox annotation + to a :class:`crowsetta.Annotation`. Returns ------- @@ -225,7 +229,7 @@ def to_annot(self) -> crowsetta.Annotation: return crowsetta.Annotation(annot_path=self.annot_path, notated_path=self.audio_path, bboxes=bboxes) def to_file(self, annot_path: PathLike) -> None: - """Make a .txt file from this annotation + """Make a txt file from this annotation in extended label track format that can be read by Audacity. Parameters diff --git a/src/crowsetta/formats/bbox/raven.py b/src/crowsetta/formats/bbox/raven.py index 162a219..3567d17 100644 --- a/src/crowsetta/formats/bbox/raven.py +++ b/src/crowsetta/formats/bbox/raven.py @@ -18,8 +18,8 @@ class RavenSchema(pandera.SchemaModel): - """A ``pandera.SchemaModel`` that validates ``pandas`` dataframes - loaded from a .txt file, created by exporting a Selection Table + """A :class:`pandera.SchemaModel` that validates :type:`pandas.DataFrame`s + loaded from a txt file, created by exporting a Selection Table from Raven. """ @@ -40,7 +40,7 @@ class Config: @crowsetta.interface.BBoxLike.register @attr.define class Raven: - """Class that represents .txt annotation files + """Class that represents txt annotation files from Raven (https://ravensoundsoftware.com/software/), created by exporting a Selection Table. @@ -53,9 +53,9 @@ class Raven: df : pandas.DataFrame with annotations loaded into it annot_path : str, pathlib.Path - Path to Raven .txt file from which annotations were loaded. + Path to Raven txt file from which annotations were loaded. audio_path : str. pathlib.Path - Path to audio file that the Raven .txt file annotates. + Path to audio file that the Raven txt file annotates. """ name: ClassVar[str] = "raven" @@ -82,11 +82,11 @@ def from_file( Parameters ---------- annot_path : str, pathlib.Path - Path to a .txt file exported from Raven. + Path to a txt file exported from Raven. annot_col : str - name of column that contains annotations + Name of column that contains annotations. audio_path : str, pathlib.Path - Path to audio file that the Raven .txt file annotates. + Path to audio file that the Raven txt file annotates. Optional, defaults to None. Examples @@ -100,7 +100,7 @@ def from_file( # assume file is space-separated with no header df = pd.read_csv(annot_path, sep="\t") if len(df) < 1: - raise ValueError(f"Cannot load annotations, " f"there are no rows in Raven .txt file:\n{df}") + raise ValueError(f"Cannot load annotations, " f"there are no rows in Raven txt file:\n{df}") columns_map = dict(cls.COLUMNS_MAP) # copy columns_map.update({annot_col: "annotation"}) df.rename(columns=columns_map, inplace=True) @@ -114,12 +114,13 @@ def from_file( ) def to_bbox(self) -> List[crowsetta.BBox]: - """Convert this Raven annotation to a ``list`` of ``crowsetta.Bbox``. + """Convert this Raven annotation to a + :class:`list` of :class:`crowsetta.Bbox` instances. Returns ------- bboxes : list - of ``crowsetta.BBox`` + A :class:`list` of :class:`crowsetta.BBox` instances. Examples -------- @@ -141,7 +142,8 @@ def to_bbox(self) -> List[crowsetta.BBox]: return bboxes def to_annot(self) -> crowsetta.Annotation: - """Convert this Raven annotation to a ``crowsetta.Annotation``. + """Convert this Raven annotation to a + :class:`crowsetta.Annotation`. Returns ------- @@ -157,13 +159,13 @@ def to_annot(self) -> crowsetta.Annotation: return crowsetta.Annotation(annot_path=self.annot_path, notated_path=self.audio_path, bboxes=bboxes) def to_file(self, annot_path: PathLike) -> None: - """make a .txt file that can be read by Raven + """Make a txt file that can be read by Raven from this annotation Parameters ---------- annot_path : str, pahtlib.Path - path including filename where file should be saved. + Path including filename where file should be saved. Must have extension '.txt' """ crowsetta.validation.validate_ext(annot_path, extension=self.ext) diff --git a/src/crowsetta/formats/seq/audseq.py b/src/crowsetta/formats/seq/audseq.py index d701c39..76df653 100644 --- a/src/crowsetta/formats/seq/audseq.py +++ b/src/crowsetta/formats/seq/audseq.py @@ -1,5 +1,5 @@ """module for Audacity LabelTrack -in standard/default format exported to .txt files +in standard/default format exported to txt files https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Standard_.28default.29_format """ import pathlib @@ -16,9 +16,12 @@ class AudSeqSchema(pandera.SchemaModel): - """A ``pandera.SchemaModel`` that validates ``pandas`` dataframes + """A :class:`pandera.SchemaModel` + that validates :type:`pandas.DataFrame`s loaded from Audacity Labeltrack annotations - exported to .txt files in the standard format + exported to txt files in the standard format. + + The standard format is described here: https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Standard_.28default.29_format """ @@ -36,10 +39,9 @@ class Config: class AudSeq: """Class meant to represent Audacity Labeltrack annotations - exported to .txt files in the standard format - https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Standard_.28default.29_format + exported to txt files in the standard format[1]_. - The .txt file will have 3 tab-separated columns + The txt file will have 3 tab-separated columns that represent the start time, end time, and labels of annotated regions. @@ -48,25 +50,28 @@ class AudSeq: name: str Shorthand name for annotation format: ``'aud-seq'``. ext: str - Extension of files in annotation format: - ``'.txt'`` + Extension of files in annotation format, ``'.txt'``. start_times : numpy.ndarray Vector of integer sample numbers corresponding - to beginning of segments, i.e. onsets + to beginning of segments, i.e. onsets. end_times : numpy.ndarray Vector of integer sample numbers corresponding - to ends of segments, i.e. offsets + to ends of segments, i.e. offsets. labels : numpy.ndarray Vector of string labels for segments; each element is either a single word, or a single phonetic transcription code. annot_path : str, pathlib.Path Path to file from which annotations were loaded. - notated_path : str. pathlib.Path - path to file that ``annot_path`` annotates. + notated_path : str, pathlib.Path + Path to file that ``annot_path`` annotates. E.g., an audio file, or an array file that contains a spectrogram generated from audio. Optional, default is None. + + References + ---------- + .. [1^] https://manual.audacityteam.org/man/importing_and_exporting_labels.html#Standard_.28default.29_format """ name: ClassVar[str] = "aud-seq" @@ -84,7 +89,7 @@ def from_file( annot_path: PathLike, notated_path: Optional[PathLike] = None, ) -> "Self": # noqa: F821 - """Load annotations from a file + """Load annotations from a file. Parameters ---------- @@ -92,7 +97,7 @@ def from_file( Path to an annotation file, with '.txt' extension. notated_path : str, pathlib.Path - path to file that ``annot_path`` annotates. + Path to file that ``annot_path`` annotates. E.g., an audio file, or an array file that contains a spectrogram generated from audio. Optional, default is None. @@ -117,12 +122,12 @@ def from_file( ) def to_seq(self, round_times: bool = True, decimals: int = 3) -> crowsetta.Sequence: - """Convert this annotation to a ``crowsetta.Sequence``. + """Convert this annotation to a :class:`crowsetta.Sequence`. Parameters ---------- round_times : bool - If True, round onsets_s and offsets_s. + If True, round ``onsets_s`` and ``offsets_s``. Default is True. decimals : int Number of decimals places to round floating point numbers to. @@ -158,7 +163,7 @@ def to_seq(self, round_times: bool = True, decimals: int = 3) -> crowsetta.Seque return seq def to_annot(self, round_times: bool = True, decimals: int = 3) -> crowsetta.Annotation: - """Convert this annotation to a ``crowsetta.Annotation``. + """Convert this annotation to a :class:`crowsetta.Annotation`. Parameters ---------- @@ -192,13 +197,13 @@ def to_annot(self, round_times: bool = True, decimals: int = 3) -> crowsetta.Ann return crowsetta.Annotation(annot_path=self.annot_path, notated_path=self.notated_path, seq=seq) def to_file(self, annot_path: PathLike) -> None: - """save this 'aud-seq' annotation to a .txt file - in the standard/default Audacity LabelTrack format + """Save this 'aud-seq' annotation to a txt file + in the standard/default Audacity LabelTrack format. Parameters ---------- annot_path : str, pathlib.Path - Path with filename of .csv file that should be saved. + Path with filename of txt file that should be saved. """ df = pd.DataFrame.from_records( {"start_time": self.start_times, "end_time": self.end_times, "label": self.labels} @@ -208,6 +213,6 @@ def to_file(self, annot_path: PathLike) -> None: df = AudSeqSchema.validate(df) except pandera.errors.SchemaError as e: raise ValueError( - f"Annotations produced an invalid dataframe, " f"cannot convert to Audacity LabelTrack .txt file:\n{df}" + f"Annotations produced an invalid dataframe, " f"cannot convert to Audacity LabelTrack txt file:\n{df}" ) from e df.to_csv(annot_path, sep="\t", header=False, index=False) diff --git a/src/crowsetta/formats/seq/birdsongrec.py b/src/crowsetta/formats/seq/birdsongrec.py index 2f136b4..9d1a9c7 100644 --- a/src/crowsetta/formats/seq/birdsongrec.py +++ b/src/crowsetta/formats/seq/birdsongrec.py @@ -1,4 +1,4 @@ -"""module with functions that handle the following dataset: +"""Module with functions that handle the following dataset: Koumura, T. (2016). BirdsongRecognition (Version 1). figshare. https://doi.org/10.6084/m9.figshare.3470165.v1 https://figshare.com/articles/BirdsongRecognition/3470165 @@ -35,7 +35,7 @@ class BirdsongRec: ext: str Extension of files in annotation format: ``'.xml'``. sequences: list - List of ``birdsongrec.Sequence`` instances. + List of :class:`birdsongrec.Sequence` instances. annot_path: pathlib.Path Path to file from which annotations were loaded. Typically with filename 'Annotation.xml'. @@ -70,14 +70,14 @@ class BirdsongRec: References ---------- - [1] Koumura, T. (2016). BirdsongRecognition (Version 1). figshare. - https://doi.org/10.6084/m9.figshare.3470165.v1 - https://figshare.com/articles/BirdsongRecognition/3470165 - - [2] Koumura T., Okanoya K. (2016) Automatic Recognition of Element Classes and - Boundaries in the Birdsong with Variable Sequences. PLoS ONE 11(7): e0159188. - https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0159188 - doi:10.1371/journal.pone.0159188 + .. [1] Koumura, T. (2016). BirdsongRecognition (Version 1). figshare. + https://doi.org/10.6084/m9.figshare.3470165.v1 + https://figshare.com/articles/BirdsongRecognition/3470165 + + .. [2] Koumura T., Okanoya K. (2016) Automatic Recognition of Element Classes and + Boundaries in the Birdsong with Variable Sequences. PLoS ONE 11(7): e0159188. + https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0159188 + doi:10.1371/journal.pone.0159188 """ name: ClassVar[str] = "birdsong-recognition-dataset" @@ -96,11 +96,11 @@ def from_file( Parameters ---------- annot_path : str, pathlib.Path - Path to .xml file from BirdsongRecognition dataset + Path to xml file from BirdsongRecognition dataset that contains annotations. wav_path : str, pathlib.Path - Path in which .wav files listed in Annotation.xml file are found. - Defaults to a directory `Wave` that is located in the parent directory of + Path in which wav files listed in Annotation.xml file are found. + Defaults to a directory ``Wave`` that is located in the parent directory of the Annotation.xml file, which matches the structure of the dataset from [1]_. .. code-block:: console @@ -114,13 +114,17 @@ def from_file( ... concat_seqs_into_songs : bool - If True, concatenate sequences from xml_file, so that + If True, concatenate sequences from ``annot_path``, so that one sequence = one song / .wav file. Default is True. Examples -------- >>> example = crowsetta.data.get('birdsong-recognition-dataset') >>> birdsongrec = crowsetta.formats.seq.BirdsongRec.from_file(example.annot_path) + + .. [1] Koumura, T. (2016). BirdsongRecognition (Version 1). figshare. + https://doi.org/10.6084/m9.figshare.3470165.v1 + https://figshare.com/articles/BirdsongRecognition/3470165 """ annot_path = pathlib.Path(annot_path) crowsetta.validation.validate_ext(annot_path, extension=cls.ext) @@ -141,7 +145,7 @@ def to_seq( self, round_times: bool = True, decimals: int = 3, samplerate: Optional[int] = None ) -> List[crowsetta.Sequence]: """Convert this set of ``'birdsong-recognition-dataset'`` - annotations to a list of ``crowsetta.Sequence`` instances. + annotations to a list of :class:`crowsetta.Sequence` instances. Parameters ---------- @@ -160,12 +164,12 @@ def to_seq( tries to open each .wav file and determine the actual sampling rate. If this does not work, then the ``onsets_s`` and ``offsets_s`` attributes - of the ``crowsetta.Sequence`` are left as None. + of the :class:`crowsetta.Sequence` are left as None. Returns ------- seqs : list - List of ``crowsetta.Sequence``s. + A :class:`list` of :class:`crowsetta.Sequence` instances. Examples -------- @@ -242,7 +246,7 @@ def to_annot( self, round_times: bool = True, decimals: int = 3, samplerate: Optional[int] = None ) -> List[crowsetta.Annotation]: """Convert this set of ``'birdsong-recognition-dataset'`` - annotations to a list of ``crowsetta.Annotation`` instances + annotations to a :class:`list` of :class:`crowsetta.Annotation` instances. Parameters ---------- @@ -261,12 +265,12 @@ def to_annot( tries to open each .wav file and determine the actual sampling rate. If this does not work, then the ``onsets_s`` and ``offsets_s`` attributes - of the ``crowsetta.Sequence`` are left as None. + of the :class:`crowsetta.Sequence` are left as None. Returns ------- annots : list - Of ``crowsetta.Annotation``. + A list of :class:`crowsetta.Annotation` instances. Examples -------- diff --git a/src/crowsetta/formats/seq/generic.py b/src/crowsetta/formats/seq/generic.py index f242a7c..5164734 100644 --- a/src/crowsetta/formats/seq/generic.py +++ b/src/crowsetta/formats/seq/generic.py @@ -1,17 +1,18 @@ """ -generic format, +Generic sequence format, meant to be an abstraction of any sequence-like format. -Consists of ``Annotation``s, -each with a ``Sequence`` made up -of ``Segment``s. +Consists of :class:`crowestta.Annotation` +instances, each with a :class:`crowsetta.Sequence` +made up of :class:`crowsetta.Segment`s. Functions in this module -load the format from a .csv, -or write a .csv in the generic format. -Other formats that convert to ``Annotation``s -with ``Sequence``s can be converted +load the format from a csv file, +or write a csv file in the generic format. +Other formats that convert to +:class:`~crowsetta.Annotation`s +with :class:`~crowsetta.Sequence`s can be converted to this format. """ import os @@ -35,8 +36,9 @@ class GenericSeqSchema(pandera.SchemaModel): - """``pandera.SchemaModel`` that validates ``pandas`` dataframes - loaded from a .csv file in the ``'generic-seq'`` annotation + """A :class: `pandera.SchemaModel` that validates + :type:`pandas.DataFrame`s + loaded from a csv file in the ``'generic-seq'`` annotation format. """ @@ -89,20 +91,20 @@ class Config: def annot2df( annot: Union[crowsetta.Annotation, List[crowsetta.Annotation]], abspath: bool = False, basename: bool = False ) -> pd.DataFrame: - """Convert sequence-like ``crowsetta.Annotation`` - to a ``pandas.DataFrame`` in the ``'generic-seq'`` format. + """Convert sequence-like :class:`crowsetta.Annotation` + to a :type:`pandas.DataFrame` in the ``'generic-seq'`` format. Parameters ---------- annot : crowsetta.Annotation, or list of Annotations csv_path : str, pathlib.Path - Path including filename of .csv to write to, + Path including filename of csv file to write to, will be created (or overwritten if it exists already) abspath : bool - if True, converts filename for each audio file into absolute path. + If True, converts filename for each audio file into absolute path. Default is False. basename : bool - if True, discard any information about path and just use file name. + If True, discard any information about path and just use file name. Default is False. Notes @@ -111,7 +113,7 @@ def annot2df( These options are useful when working with multiple copies of files, and for reproducibility (so you know which copy of a file you were working with). Default for both is False, in which case the filename is saved just as it is passed to - this function in a Sequence object. + this function in a :class:`crowsetta.Sequence` object. """ if not (isinstance(annot, crowsetta.Annotation) or isinstance(annot, list)): raise TypeError("annot must be Annotation or list of Annotations, " f"not type {type(annot)})") @@ -177,20 +179,20 @@ def annot2csv( abspath: bool = False, basename: bool = False, ) -> None: - """write sequence-like ``crowsetta.Annotation`` - to a .csv file in the ``'generic-seq'`` format + """Write sequence-like :class:`crowsetta.Annotation` + to a csv file in the ``'generic-seq'`` format Parameters ---------- annot : crowsetta.Annotation, or list of Annotations csv_path : str, pathlib.Path - path including filename of .csv to write to, + Path including filename of csv file to write to, will be created (or overwritten if it exists already) abspath : bool - if True, converts filename for each audio file into absolute path. + If True, converts filename for each audio file into absolute path. Default is False. basename : bool - if True, discard any information about path and just use file name. + If True, discard any information about path and just use file name. Default is False. Notes @@ -206,19 +208,20 @@ def annot2csv( def csv2annot(csv_path: PathLike) -> List[crowsetta.Annotation]: - """loads a comma-separated values (csv) file containing annotations - for song files, returns contents as a list of Annotation objects + """Loads a comma-separated values (csv) file containing annotations + for song files, returns contents as a + :class:`list` of :class:`crowsetta.Annotation` instances. Parameters ---------- csv_path : str, pathlib.Path - Path to .csv file containing annotations + Path to csv file containing annotations saved in the ``'generic-seq'`` format. Returns ------- annot_list : list - list of Annotations + A :class:`list` of :class:`crowsetta.Annotation` instances. """ df = pd.read_csv(csv_path) GenericSeqSchema.validate(df) @@ -278,27 +281,26 @@ def csv2annot(csv_path: PathLike) -> List[crowsetta.Annotation]: @crowsetta.interface.SeqLike.register @attr.define class GenericSeq: - """ - class that represents annotations from a generic format, + """Class that represents annotations from a generic format, meant to be an abstraction of any sequence-like format. - Consists of ``Annotation``s, - each with a ``Sequence`` made up - of ``Segment``s. + Consists of :class:`crowsetta.Annotation`s, + each with a :class:`crowsetta.Sequence` made up + of :class:`crowsetta.Segment`s. - Other formats that convert to ``Annotation``s - with ``Sequence``s can be converted + Other formats that convert to :class:`~crowsetta.Annotation`s + with :class:`~crowsetta.Sequence`s can be converted to this format. Attributes ---------- name: str - shorthand name for annotation format: ``'generic-seq'`` + Shorthand name for annotation format: ``'generic-seq'`` ext: str - extension of files in annotation format: ``'.csv'`` + Extension of files in annotation format: ``'.csv'`` annots : list - of ``crowsetta.Annotation`` instances + A :class:`list` of :class:`crowsetta.Annotation` instances. """ name: ClassVar[str] = "generic-seq" @@ -308,12 +310,12 @@ class that represents annotations from a generic format, @classmethod def from_file(cls, annot_path: PathLike) -> "Self": # noqa: F821 - """load annotations in 'generic-seq' format from a .csv file + """Load annotations in 'generic-seq' format from a csv file. Parameters ---------- annot_path : str, pathlib.Path - Path to .csv file containing annotations + Path to csv file containing annotations saved in the ``'generic-seq'`` format. Examples @@ -324,8 +326,8 @@ def from_file(cls, annot_path: PathLike) -> "Self": # noqa: F821 return cls(annots=annots) def to_seq(self) -> List[crowsetta.Sequence]: - """return a list of ``crowsetta.Sequence``, - one for every annotation + """Return a :class:`list` of :class:`crowsetta.Sequence` instances, + one for every annotation. Examples -------- @@ -336,12 +338,12 @@ def to_seq(self) -> List[crowsetta.Sequence]: return [annot.seq for annot in self.annots] def to_annot(self) -> List[crowsetta.Annotation]: - """returns these ``crowsetta.Annotation`` instances - as a list + """Returns this set of :class:`crowsetta.Annotation` instances + as a :class:`list`. - This is the same as accessing the list of ``Annotation`` - instances directly. It is implemented so that this class - conforms with the ``SeqLike`` interface. + This is the same as accessing the :class:`list` of :class:`crowsetta.Annotation` + instances directly. The method is implemented so that this class + conforms with the :class:`crowsetta.interface.seq.SeqLike` interface. Examples -------- @@ -352,8 +354,7 @@ def to_annot(self) -> List[crowsetta.Annotation]: return self.annots def to_df(self, abspath: bool = False, basename: bool = False) -> pd.DataFrame: - """Convert these annotations to a - ``pandas.DataFrame`` + """Convert these annotations to a :type:`pandas.DataFrame`. abspath : bool If True, converts filename for each audio file into absolute path. @@ -365,13 +366,13 @@ def to_df(self, abspath: bool = False, basename: bool = False) -> pd.DataFrame: return annot2df(self.annots, abspath, basename) def to_file(self, annot_path: PathLike, abspath: bool = False, basename: bool = False) -> None: - """Write these annotations to a .csv file + """Write these annotations to a csv file in ``'generic-seq'`` format. Parameters ---------- annot_path : str, pathlib.Path - Path including filename of .csv to write to, + Path including filename of csv file to write to, will be created (or overwritten if it exists already) abspath : bool If True, converts filename for each audio file into absolute path. diff --git a/src/crowsetta/formats/seq/notmat.py b/src/crowsetta/formats/seq/notmat.py index 488dcb0..cc9d4f2 100644 --- a/src/crowsetta/formats/seq/notmat.py +++ b/src/crowsetta/formats/seq/notmat.py @@ -1,5 +1,5 @@ -"""module with functions that handle .not.mat annotation files -produced by evsonganaly GUI +"""Module with functions that handle .not.mat annotation files +produced by evsonganaly GUI. """ import pathlib from typing import ClassVar, Dict, Optional @@ -81,7 +81,7 @@ def from_file(cls, annot_path: PathLike) -> "Self": # noqa: F821 return cls(annot_path=annot_path, onsets=onsets, offsets=offsets, labels=labels, audio_path=audio_path) def to_seq(self, round_times: bool = True, decimals: int = 3) -> crowsetta.Sequence: - """Convert this .not.mat annotation to a ``crowsetta.Sequence``. + """Convert this .not.mat annotation to a :class:`crowsetta.Sequence`. Parameters ---------- @@ -122,7 +122,7 @@ def to_seq(self, round_times: bool = True, decimals: int = 3) -> crowsetta.Seque return seq def to_annot(self, round_times: bool = True, decimals: int = 3) -> crowsetta.Annotation: - """Convert this .not.mat annotation to a ``crowsetta.Annotation``. + """Convert this .not.mat annotation to a :class:`crowsetta.Annotation`. Parameters ---------- @@ -175,11 +175,14 @@ def to_file( samp_freq : int Sampling frequency of audio file. threshold : int - Value above which amplitude is considered part of a segment. default is 5000. + Value above which amplitude is considered part of a segment. + Default is 5000. min_syl_dur : float - Minimum duration of a segment. default is 0.02, i.e. 20 ms. + Minimum duration of a segment. + Default is 0.02, i.e. 20 ms. min_silent_dur : float - Minimum duration of silent gap between segment. default is 0.002, i.e. 2 ms. + Minimum duration of silent gap between segment. + Default is 0.002, i.e. 2 ms. fname : str, pathlib.Path Name of audio file associated with .not.mat, will be used as base of name for .not.mat file. diff --git a/src/crowsetta/formats/seq/simple.py b/src/crowsetta/formats/seq/simple.py index 450027a..6d2cf75 100644 --- a/src/crowsetta/formats/seq/simple.py +++ b/src/crowsetta/formats/seq/simple.py @@ -1,8 +1,8 @@ -"""module with functions meant to handle +"""Module with functions meant to handle any simple sequence-like annotation format. -Can be a .csv or .txt file, -should have 3 columns that represent +The annotations can be a csv or txt file; +the format should have 3 columns that represent the onset and offset times in seconds and the labels of the segments in the annotated sequences. @@ -31,10 +31,11 @@ class SimpleSeqSchema(pandera.SchemaModel): - """A ``pandera.SchemaModel`` that validates ``pandas`` dataframes - loaded from a .csv or .txt file in a 'simple-seq' format. + """A :class:`pandera.SchemaModel` + that validates :type:`pandas.DataFrame`s + loaded from a csv or txt file in a 'simple-seq' format. - The ``SimpleSeq.from_file`` loads the ``pandas.DataFrame`` + The :meth:`SimpleSeq.from_file` loads the :type:`pandas.DataFrame` and makes any changes needed to get it to this format before validation, e.g., changing column names. """ @@ -53,10 +54,10 @@ class Config: class SimpleSeq: """Class meant to represent any simple sequence-like annotation format. - Can be a .csv or .txt file; - should have 3 columns that represent - the onset time, offset time, - and labels of the segments + The annotations can be a csv or txt file; + the format should have 3 columns that represent + the onset and offset times in seconds + and the labels of the segments in the annotated sequences. The default is to assume @@ -113,10 +114,10 @@ def from_file( """Load annotations from a file in the 'simple-seq' format. - Can be a .csv or .txt file; - should have 3 columns that represent - the onset time, offset time, - and labels of the segments + The annotations can be a csv or txt file; + the format should have 3 columns that represent + the onset and offset times in seconds + and the labels of the segments in the annotated sequences. The default is to assume @@ -135,7 +136,7 @@ def from_file( Path to an annotation file, with one of the extensions {'.csv', '.txt'}. notated_path : str, pathlib.Path - path to file that ``annot_path`` annotates. + Path to file that ``annot_path`` annotates. E.g., an audio file, or an array file that contains a spectrogram generated from audio. Optional, default is None. @@ -147,10 +148,10 @@ def from_file( Optional, default is None--assumes that columns have the standardized names. read_csv_kwargs : dict - keyword arguments passed to - ``pandas.read_csv``. Default is None, + Keyword arguments passed to + :func:`pandas.read_csv`. Default is None, in which case all defaults for - ``pandas.read_csv`` will be used. + :func:`pandas.read_csv` will be used. Examples -------- @@ -183,7 +184,7 @@ def from_file( ) def to_seq(self, round_times: bool = True, decimals: int = 3) -> crowsetta.Sequence: - """Convert this annotation to a ``crowsetta.Sequence``. + """Convert this annotation to a :class:`crowsetta.Sequence`. Parameters ---------- @@ -228,7 +229,7 @@ def to_seq(self, round_times: bool = True, decimals: int = 3) -> crowsetta.Seque return seq def to_annot(self, round_times: bool = True, decimals: int = 3) -> crowsetta.Annotation: - """Convert this annotation to a ``crowsetta.Annotation``. + """Convert this annotation to a :class:`crowsetta.Annotation`. Parameters ---------- @@ -266,17 +267,17 @@ def to_annot(self, round_times: bool = True, decimals: int = 3) -> crowsetta.Ann return crowsetta.Annotation(annot_path=self.annot_path, notated_path=self.notated_path, seq=seq) def to_file(self, annot_path: PathLike, to_csv_kwargs: Optional[Mapping] = None) -> None: - """Save this 'simple-seq' annotation to a .csv file. + """Save this 'simple-seq' annotation to a csv file. Parameters ---------- annot_path : str, pathlib.Path - path with filename of .csv file that should be saved + Path with filename of csv file that should be saved to_csv_kwargs : dict-like keyword arguments passed to - ``pandas.DataFrame.to_csv``. + :meth:`pandas.DataFrame.to_csv`. Default is None, in which case - defaults for ``pandas.to_csv`` + defaults for :func:`pandas.to_csv` will be used, except ``index`` is set to False. """ diff --git a/src/crowsetta/formats/seq/textgrid.py b/src/crowsetta/formats/seq/textgrid.py index b1ff88e..be5168c 100644 --- a/src/crowsetta/formats/seq/textgrid.py +++ b/src/crowsetta/formats/seq/textgrid.py @@ -1,8 +1,8 @@ -"""module with functions for working with Praat TextGrid annotation files +"""Module with functions for working with Praat TextGrid annotation files -uses the Python library textgrid +Uses the Python library ``textgrid``: https://github.com/kylebgorman/textgrid -a version is distributed with this code (../textgrid) under MIT license +A version is distributed with this code (../textgrid) under MIT license. https://github.com/kylebgorman/textgrid/blob/master/LICENSE """ import pathlib @@ -37,23 +37,24 @@ class TextGrid: textgrid : textgrid.TextGrid object that contains annotations from the a '.TextGrid' file. annot_path : str, pathlib.Path - Path to .TextGrid file from which annotations were loaded. + Path to TextGrid file from which annotations were loaded. audio_path : str, pathlib.Path Path to audio file that ``annot_path`` annotates. Notes ----- - uses the Python library textgrid + Uses the Python library textgrid https://github.com/kylebgorman/textgrid - a version is distributed with this code (../textgrid) under MIT license + + A version is distributed with this code (../textgrid) under MIT license https://github.com/kylebgorman/textgrid/blob/master/LICENSE - This class will load any file that the ``textgrid`` libray can parse, - but it can only convert Praat IntervalTiers to ``crowsetta.Sequence`` and - ``crowsetta.Annotation`` instances. + This class will load any file that the :mod:`~crowsetta._vendor.textgrid` libray can parse, + but it can only convert Praat IntervalTiers to :class:`crowsetta.Sequence` and + :class:`crowsetta.Annotation` instances. Additionally, it will only convert a single IntervalTier - (that can be specified when calling ``TextGrid.to_seq`` - or ``TextGrid.to_annot``). + (that can be specified when calling :meth:`crowsetta.formats.seq.TextGrid.to_seq` + or :meth:`crowsetta.formats.seq.TextGrid.to_annot`). """ name: ClassVar[str] = "textgrid" @@ -69,13 +70,13 @@ def from_file( annot_path: PathLike, audio_path: Optional[PathLike] = None, ) -> "Self": # noqa: F821 - """load annotations from .TextGrid file, - like those generated by the Praat application. + """Load annotations from a TextGrid file + in the format used by Praat. Parameters ---------- annot_path: str, pathlib.Path - Path to a .TextGrid file in the format used by Praat. + Path to a TextGrid file in the format used by Praat. audio_path : str. pathlib.Path Path to audio file that the ``annot_path`` annotates. Optional, default is None. @@ -93,16 +94,16 @@ def from_file( return cls(textgrid=tg, annot_path=annot_path, audio_path=audio_path) def to_seq(self, interval_tier: int = 0, round_times: bool = True, decimals: int = 3) -> crowsetta.Sequence: - """Convert an IntervalTier from this .TextGrid annotation - into a ``crowsetta.Sequence``. + """Convert an IntervalTier from this TextGrid annotation + into a :class:`crowsetta.Sequence`. Currently, there is only support for converting a single IntervalTier - to a single ``Sequence``. + to a single :class:`~crowsetta.Sequence`. Parameters ---------- interval_tier : int - index of IntervalTier in TextGrid file from which annotations + Index of IntervalTier in TextGrid file from which annotations should be taken. Default is 0, i.e., the first IntervalTier. Necessary in cases where files have multiple IntervalTiers. round_times : bool @@ -128,7 +129,7 @@ def to_seq(self, interval_tier: int = 0, round_times: bool = True, decimals: int The ``round_times`` and ``decimals`` arguments are provided to reduce differences across platforms due to floating point error, e.g. when loading annotation files - and then sending them to a .csv file, + and then sending them to a csv file, the result should be the same on Windows and Linux. """ intv_tier = self.textgrid[interval_tier] @@ -150,11 +151,11 @@ def to_seq(self, interval_tier: int = 0, round_times: bool = True, decimals: int return seq def to_annot(self, interval_tier: int = 0, round_times: bool = True, decimals: int = 3) -> crowsetta.Annotation: - """Convert an IntervalTier from this .TextGrid annotation - to a ``crowsetta.Annotation``. + """Convert an IntervalTier from this TextGrid annotation + to a :class:`crowsetta.Annotation`. Currently, there is only support for converting a single IntervalTier - to an ``Annotation`` with a single ``Sequence``. + to an :class:`~crowsetta.Annotation` with a single :class:`~crowsetta.Sequence`. Parameters ---------- @@ -185,7 +186,7 @@ def to_annot(self, interval_tier: int = 0, round_times: bool = True, decimals: i The ``round_times`` and ``decimals`` arguments are provided to reduce differences across platforms due to floating point error, e.g. when loading annotation files - and then sending them to a .csv file, + and then sending them to a csv file, the result should be the same on Windows and Linux. """ seq = self.to_seq(interval_tier=interval_tier, round_times=round_times, decimals=decimals) diff --git a/src/crowsetta/formats/seq/timit.py b/src/crowsetta/formats/seq/timit.py index 816aaff..b48ec83 100644 --- a/src/crowsetta/formats/seq/timit.py +++ b/src/crowsetta/formats/seq/timit.py @@ -1,4 +1,8 @@ -"""module with functions that handle .phn annotation files from the TIMIT dataset +"""Module with functions that handle phn annotation files +from the TIMIT[1]_ dataset. + +.. [1] Garofolo, John S., et al. TIMIT Acoustic-Phonetic Continuous Speech Corpus LDC93S1. + Web Download. Philadelphia: Linguistic Data Consortium, 1993. """ import pathlib import warnings @@ -16,8 +20,13 @@ class TimitTranscriptSchema(pandera.SchemaModel): - """A ``pandera.SchemaModel`` that validates ``pandas`` dataframes - loaded from a .phn or .wrd file in the TIMIT transcription format. + """A :class:`pandera.SchemaModel` that validates :type:`pandas.DataFrame`s + loaded from a phn or wrd file in the TIMIT[1]_ transcription format. + + References + ---------- + .. [1] Garofolo, John S., et al. TIMIT Acoustic-Phonetic Continuous Speech Corpus LDC93S1. + Web Download. Philadelphia: Linguistic Data Consortium, 1993. """ begin_sample: Optional[Series[int]] = pandera.Field() @@ -33,7 +42,7 @@ class Config: @attr.define class Timit: """Class that represents annotations from transcription files in the - DARPA TIMIT Acoustic-Phonetic Continuous Speech Corpus (TIMIT) + DARPA TIMIT Acoustic-Phonetic Continuous Speech Corpus[1]_. Attributes ---------- @@ -56,6 +65,11 @@ class Timit: Path to TIMIT transcription file from which annotations were loaded. audio_path : str. pathlib.Path Path to audio file that the TIMIT transcription file annotates. + + References + ---------- + .. [1] Garofolo, John S., et al. TIMIT Acoustic-Phonetic Continuous Speech Corpus LDC93S1. + Web Download. Philadelphia: Linguistic Data Consortium, 1993. """ name: ClassVar[str] = "timit" @@ -69,7 +83,7 @@ class Timit: @classmethod def from_file(cls, annot_path: PathLike, audio_path: Optional[PathLike] = None) -> "Self": # noqa: F821 - """Load annotations from a TIMIT transcription file + """Load annotations from a TIMIT[1]_ transcription file. Parameters ---------- @@ -91,6 +105,11 @@ def from_file(cls, annot_path: PathLike, audio_path: Optional[PathLike] = None) ----- Versions of the dataset exist with the extensions in capital letters. Some platforms may not have case-sensitive paths. + + References + ---------- + .. [1] Garofolo, John S., et al. TIMIT Acoustic-Phonetic Continuous Speech Corpus LDC93S1. + Web Download. Philadelphia: Linguistic Data Consortium, 1993. """ annot_path = pathlib.Path(annot_path) # note multiple extensions, both all-uppercase and all-lowercase `.phn` exist, @@ -122,7 +141,7 @@ def from_file(cls, annot_path: PathLike, audio_path: Optional[PathLike] = None) def to_seq( self, round_times: bool = True, decimals: int = 3, samplerate: Optional[int] = None ) -> crowsetta.Sequence: - """Convert this TIMIT annotation to a ``crowsetta.Sequence``. + """Convert this TIMIT annotation to a :class:`crowsetta.Sequence`. Parameters ---------- @@ -141,7 +160,7 @@ def to_seq( tries to open ``audio_path`` and determine the actual sampling rate. If this does not work, then the ``onsets_s`` and ``offsets_s`` attributes - of the ``crowsetta.Sequence`` are left as None. + of the :class:`crowsetta.Sequence` are left as None. Examples -------- @@ -198,15 +217,15 @@ def to_seq( def to_annot( self, round_times: bool = True, decimals: int = 3, samplerate: Optional[int] = None ) -> crowsetta.Annotation: - """Convert this TIMIT annotation to a ``crowsetta.Annotation``. + """Convert this TIMIT annotation to a :class:`crowsetta.Annotation`. Parameters ---------- round_times : bool - if True, round onsets_s and offsets_s. + If True, round onsets_s and offsets_s. Default is True. decimals : int - number of decimals places to round floating point numbers to. + Number of decimals places to round floating point numbers to. Only meaningful if round_times is True. Default is 3, so that times are rounded to milliseconds. samplerate : int @@ -217,7 +236,7 @@ def to_annot( tries to open ``audio_path`` and determine the actual sampling rate. If this does not work, then the ``onsets_s`` and ``offsets_s`` attributes - of the ``crowsetta.Sequence`` are left as None. + of the :class:`crowsetta.Sequence` are left as None. Examples -------- @@ -241,12 +260,13 @@ def to_annot( return crowsetta.Annotation(annot_path=self.annot_path, notated_path=self.audio_path, seq=phn_seq) def to_file(self, annot_path: PathLike) -> None: - """make a .phn file from an annotation + """Make a phn file in the TIMIT format + from this instance. Parameters ---------- annot_path : str, pahtlib.Path - path including filename where file should be saved. + Path including filename where file should be saved. Must have a valid extension for TIMIT transcription files, one of {'.phn', '.PHN', '.wrd', '.WRD'}. """ diff --git a/src/crowsetta/formats/seq/yarden.py b/src/crowsetta/formats/seq/yarden.py index d941850..c7ad399 100644 --- a/src/crowsetta/formats/seq/yarden.py +++ b/src/crowsetta/formats/seq/yarden.py @@ -1,4 +1,4 @@ -"""module for loading annotations from .mat files +"""Module for loading annotations from .mat files created by SongAnnotationGUI: https://github.com/yardencsGitHub/BirdSongBout/tree/master/helpers/GUI """ @@ -32,7 +32,7 @@ def _cast_to_arr(val): def _recursive_stem(path_str): - """helper function that 'recursively' removes file extensions + """Helper function that 'recursively' removes file extensions to recover name of an audio file from the name of an array file i.e. bird1_122213_1534.wav.mat -> i.e. bird1_122213_1534.wav @@ -68,13 +68,13 @@ class SongAnnotationGUI: ext: str Extension of files in annotation format: ``'.mat'``. annotations : numpy.ndarray - ``numpy`` record array where each record is an annotation. + A :mod:`numpy` record array where each record is an annotation. audio_paths : numpy.ndarray - ``numpy`` array where each element is a path to an audio file. + A :mod:`numpy` array where each element is a path to an audio file. Same length as ``annotations``. Each element in ``annotations`` is the annotation for the corresponding path in ``audio_paths``. annot_path : str, pathlib.Path - Path to .mat file from which annotations were loaded. + Path to mat file from which annotations were loaded. """ name: ClassVar[str] = "yarden" @@ -86,7 +86,7 @@ class SongAnnotationGUI: @classmethod def from_file(cls, annot_path: PathLike) -> "Self": # noqa: F821 - """load annotations from .mat files + """Load annotations from mat files created by SongAnnotationGUI: https://github.com/yardencsGitHub/BirdSongBout/tree/master/helpers/GUI @@ -112,7 +112,11 @@ def from_file(cls, annot_path: PathLike) -> "Self": # noqa: F821 return cls(annotations=annotations, audio_paths=audio_paths, annot_path=annot_path) def to_seq(self, round_times: bool = True, decimals: int = 3) -> List[crowsetta.Sequence]: - """Convert this .not.mat annotation to a ``crowsetta.Sequence``. + """Convert this set of annotations to a :class:`list` of + :class:`crowsetta.Sequence` instances. + + We assume there is one :class:`~crowsetta.Sequence` + per annotated song in the source annotations. Parameters ---------- @@ -127,7 +131,8 @@ def to_seq(self, round_times: bool = True, decimals: int = 3) -> List[crowsetta. Returns ------- seqs : list - of ``crowsetta.Sequence``, one for each element in ``annotations``. + A :class:`list` of :class:`~crowsetta.Sequence` instances, + one for each element in ``annotations``. Notes ----- @@ -166,7 +171,7 @@ def to_seq(self, round_times: bool = True, decimals: int = 3) -> List[crowsetta. return seqs def to_annot(self, round_times: bool = True, decimals: int = 3) -> List[crowsetta.Annotation]: - """Convert this .not.mat annotation to a ``crowsetta.Annotation``. + """Convert this annotation to a :class:`crowsetta.Annotation`. Parameters ---------- @@ -181,7 +186,7 @@ def to_annot(self, round_times: bool = True, decimals: int = 3) -> List[crowsett Returns ------- annots : list - of ``crowsetta.Annotation`` + A :class:`list` of :class:`crowsetta.Annotation` instances. Notes ----- diff --git a/src/crowsetta/interface/base.py b/src/crowsetta/interface/base.py index 4b81609..0a8ae90 100644 --- a/src/crowsetta/interface/base.py +++ b/src/crowsetta/interface/base.py @@ -25,8 +25,8 @@ def from_file(cls) -> "Self": # noqa: F821 @abc.abstractmethod def to_annot(self) -> "Union[crowsetta.Annotation,List[crowsetta.Annotation]]": # noqa: F821 """Converts the instance representing annotations - loaded from a file into a `crowsetta.Annotation` - or a list of `crowsetta.Annotation`s, - that can be used to convert to other formats + loaded from a file into a :class:`crowsetta.Annotation` + or a :class:`list` of :class:`~crowsetta.Annotation` instances, + that can be used to convert to other formats. """ ... diff --git a/src/crowsetta/interface/bbox/base.py b/src/crowsetta/interface/bbox/base.py index 423984d..4cc038f 100644 --- a/src/crowsetta/interface/bbox/base.py +++ b/src/crowsetta/interface/bbox/base.py @@ -12,10 +12,10 @@ class BBoxLike(BaseFormat, abc.ABC): for any annotation format that can be represented as a set of labeled bounding boxes. - In terms of code in ``crowsetta``, + In terms of code in :mod:`crowsetta`, a bounding box-like format is any format that can be represented as a - collection of ``crowsetta.BBox`` instances. + collection of :class:`crowsetta.BBox` instances. The code block below shows some of the features of this data type. .. code-block:: python @@ -45,8 +45,8 @@ class BBoxLike(BaseFormat, abc.ABC): def to_bbox(self) -> "Union[crowsetta.BBox, Sequence[crowsetta.BBox]]": # noqa : F821 """Converts the annotation to - a ``crowsetta.BBox`` instance - or a python sequence of - ``crowsetta.BBox`` instances. + a :class:`crowsetta.BBox` instance + or a Python :class:`~typing.Sequence` of + :class:`crowsetta.BBox` instances. """ ... diff --git a/src/crowsetta/interface/seq/base.py b/src/crowsetta/interface/seq/base.py index c5f748e..4019196 100644 --- a/src/crowsetta/interface/seq/base.py +++ b/src/crowsetta/interface/seq/base.py @@ -12,10 +12,10 @@ class SeqLike(BaseFormat, abc.ABC): with each segment having an onset time, offset time, and a label. - In terms of code in ``crowsetta``, + In terms of code in :mod:`crowsetta`, a sequence-like format is any format that can be represented as a - ``crowsetta.Sequence`` made up of ``crowsetta.Segment``s. + :class:`crowsetta.Sequence` made up of :class:`crowsetta.Segment` instances. The code block below shows some of the features of these data types. .. code-block:: python diff --git a/src/crowsetta/segment.py b/src/crowsetta/segment.py index e0e70c6..b20fefe 100644 --- a/src/crowsetta/segment.py +++ b/src/crowsetta/segment.py @@ -6,8 +6,8 @@ def convert_int(val): - """Converter that converts ``numpy.integer`` to ``int``, - returns native Python ``int`` as is, and + """Converter that converts :class:`numpy.integer` to :class:`int`, + returns native Python :class:`int` as is, and raises an error for any other type. """ if hasattr(val, "dtype") and isinstance(val, np.integer): diff --git a/src/crowsetta/sequence.py b/src/crowsetta/sequence.py index 1c50352..a31097d 100644 --- a/src/crowsetta/sequence.py +++ b/src/crowsetta/sequence.py @@ -18,25 +18,29 @@ class Sequence: Attributes ---------- segments : tuple - of Segment objects. + A :class:`tuple` of :class:`crowsetta.Segment` instances. onset_samples : numpy.ndarray or None - of type int, onset of each annotated segment in samples/second + Numpy array of type int, onset of each annotated segment in sample number. offset_samples : numpy.ndarray or None - of type int, offset of each annotated segment in samples/second + Numpy array of type int, offset of each annotated segment in sample number. onsets_s : numpy.ndarray or None - of type float, onset of each annotated segment in seconds + Numpy array of type float, onset of each annotated segment in seconds. offsets_s : numpy.ndarray or None - of type float, offset of each annotated segment in seconds + Numpy array of type float, offset of each annotated segment in seconds. labels : str, list, or numpy.ndarray - of type str, label for each annotated segment + Numpy array of type char, label for each annotated segment. Methods ------- - from_segments : make a Sequence from a list of segments - from_keyword : make a Sequence by passing keywords (all arguments except segments) - from_dict : like from_keyword, but pass a Python dictionary where keys are keywords - and values are arguments for those keywords - to_dict : convert to a dict. The inverse of from_dict. + from_segments : method + Make a :class:`~crowsetta.Sequence` from a :class:`list` of :class:`~crowsetta.Segment`s. + from_keyword : method + Make a :class:`~crowsetta.Sequence` by passing keywords (all arguments except segments) + from_dict : method + Like from_keyword, but pass a Python dictionary where keys are keywords + and values are arguments for those keywords. + to_dict : method + Convert to a :class:`dict`. The inverse of :meth:`~crowsetta.Sequence.from_dict`. Examples -------- @@ -50,8 +54,8 @@ class Sequence: >>> labels = np.array(['a', 'a', 'b']) >>> seq = crowsetta.Sequence.from_keyword(labels=labels, onsets_s=onsets_s, offsets_s=offsets_s) - The same sequence could also be made with ``crowsetta.Segment`` instances, - by calling the ``from_segments`` class method. + The same sequence could also be made + by calling the :meth:`~crowsetta.Sequence.from_segments` class method. >>> segments = [] >>> for onset, offset, label in zip(onsets_s, offsets_s, labels): @@ -60,22 +64,22 @@ class Sequence: """ def __init__(self, segments, labels, onsets_s=None, offsets_s=None, onset_samples=None, offset_samples=None): - """Initialize a new ``Sequence`` instance. + """Initialize a new :class:`~crowsetta.Sequence` instance. Parameters ---------- - segments : list or tuple - of Segment objects. + segments : tuple + A :class:`tuple` of :class:`crowsetta.Segment` instances. onset_samples : numpy.ndarray or None - of type int, onset of each annotated segment in samples/second + Numpy array of type int, onset of each annotated segment in sample number. offset_samples : numpy.ndarray or None - of type int, offset of each annotated segment in samples/second + Numpy array of type int, offset of each annotated segment in sample number. onsets_s : numpy.ndarray or None - of type float, onset of each annotated segment in seconds + Numpy array of type float, onset of each annotated segment in seconds. offsets_s : numpy.ndarray or None - of type float, offset of each annotated segment in seconds + Numpy array of type float, offset of each annotated segment in seconds. labels : str, list, or numpy.ndarray - of type str, label for each annotated segment + Numpy array of type char, label for each annotated segment. """ if segments is not None: if type(segments) == Segment: @@ -189,7 +193,7 @@ def _convert_labels(labels): @staticmethod def _validate_segments_type(segments): - """validate that all items in list of segments are Segment""" + """Validate that all items in list of segments are Segment""" if not all([type(seg) == Segment for seg in segments]): raise TypeError( "A Sequence must be made from a list of Segments but not all " "items in the list passed were Segments." @@ -197,7 +201,7 @@ def _validate_segments_type(segments): @staticmethod def _validate_onsets_offsets_labels(onsets_s, offsets_s, onset_samples, offset_samples, labels): - """validate onsets, offsets, and labels passed to __init__ or class methods + """Validate onsets, offsets, and labels passed to __init__ or class methods Parameters ---------- @@ -313,17 +317,19 @@ def _validate_onsets_offsets_labels(onsets_s, offsets_s, onset_samples, offset_s @classmethod def from_segments(cls, segments): - """construct a Sequence from a list of Segment objects + """Construct a :class:`crowsetta.Sequence` + from a :class:`list` of :class:`crowsetta.Segment` objects. Parameters ---------- segments : list - Of crowsetta.Segment instances. + A :class:`list` of :class:`crowsetta.Segment` instances. Returns ------- - seq : Sequence - instance of Sequence generated using list of segments + seq : crowsetta.Sequence + A :class:`~crowsetta.Sequence` instance + generated using the :class:`list` of :class:`~crowsetta.Segment`s. """ cls._validate_segments_type(segments) @@ -356,7 +362,7 @@ def from_segments(cls, segments): @classmethod def from_keyword(cls, labels, onset_samples=None, offset_samples=None, onsets_s=None, offsets_s=None): - """construct a Sequence from keyword arguments + """Construct a :class:`crowsetta.Sequence` from keyword arguments Parameters ---------- @@ -371,7 +377,8 @@ def from_keyword(cls, labels, onset_samples=None, offset_samples=None, onsets_s= labels : str, list, or numpy.ndarray of type str, label for each annotated segment - Must specify both onsets and offsets, either in units of Hz or seconds (or both). + Must specify both onsets and offsets, + either in units of Hz or seconds (or both). """ labels = cls._convert_labels(labels) @@ -396,8 +403,9 @@ def from_keyword(cls, labels, onset_samples=None, offset_samples=None, onsets_s= @classmethod def from_dict(cls, seq_dict): - """Returns a Sequence, given a Python dictionary - where keys of dictionary are arguments to Sequence.from_keyword() + """Construct a :class:`crowsetta.Sequence` + from a :class:`dict` where keys + are arguments to :meth:`~crowsetta.Sequence.from_keyword`. Parameters ---------- @@ -414,8 +422,8 @@ def from_dict(cls, seq_dict): labels : str, list, or numpy.ndarray of type str, label for each annotated segment - seq_dict must specify both onsets and offsets, either in units of Hz or seconds - (or both). + ``seq_dict`` must specify both onsets and offsets, + either in units of samples or seconds (or both). Examples -------- @@ -432,7 +440,8 @@ def from_dict(cls, seq_dict): return cls.from_keyword(**seq_dict) def as_dict(self) -> dict: - """Returns sequence as a dictionary + """Convert this :class:`crowsetta.Sequence` + to a :class:`dict`. Returns ------- diff --git a/src/crowsetta/transcriber.py b/src/crowsetta/transcriber.py index 2e715b4..3c95b6a 100644 --- a/src/crowsetta/transcriber.py +++ b/src/crowsetta/transcriber.py @@ -4,31 +4,53 @@ class Transcriber: - """A single class for working with all annotation formats in ``crowsetta``, - making it possible to load multiple files from a single ``Transcriber``, - without needing to know the names of classes that represent formats. - - converts into ``Annotation`` instances + """The :class:`crowsetta.Transcriber` class provides a + way to work with all annotation formats in :mod:`crowsetta`, + without needing to know the names of classes that represent formats + (e.g., :class:`crowsetta.formats.seq.AudSeq` or + :class:`crowsetta.formats.bbox.Raven`.) + + When you make a :class:`~crowsetta.Transcriber` instance, + you specify its `format` as a string name, + one of the names returned by :func:`crowsetta.formats.as_list`. + + You can then use this :class:`~crowsetta.Transcriber` instance + to load multiple annotation files in that ``format``, + by calling the :meth:`~crowsetta.Transcriber.from_file` method + repeatedly, e.g., in a for loop or list comprehension. + This will create multiple instances of the classes that represent + annotation format, one instance for each annotation file. + With method chaining you can convert each loaded file at the same time + to :class:`crowsetta.Annotation`s (the data structure used to work with annotations and convert between formats), and save annotations to comma-separated values (csv) files or other file formats. + See examples below. Attributes ---------- format : str or class - If a string, name of vocal annotation format that ``Transcriber`` will use. - Must be one of ``crowsetta.formats``. - If a class, must be either sequence-like or bounding-box-like, i.e., - registered as either `crowsetta.interface.SeqLike`` or ``crowsetta.interface.BBoxLike``. - + If a string, name of annotation format that the + :class:`~crowsetta.Transcriber` will use. + Must be one of the shorthand string names returned by + :func:`crowsetta.formats.as_list`. + If a class, must be one of the classes in + :mod:`crowsetta.formats` that the shorthand strings refer to. + You can register your own class using + :func:`crowsetta.formats.register_format`. + All format classes must be + either sequence-like or bounding-box-like, i.e., + registered as either + :class:`crowsetta.interface.seq.SeqLike` or + :class:`crowsetta.interface.bbox.BBoxLike`. Methods ------- from_file : Loads annotations from a file Examples -------- - - An example of loading a sequence-like format with the ``from_file`` method. + An example of loading a sequence-like format with the + :meth:`~crowsetta.Transcriber.from_file` method. >>> import crowsetta >>> scribe = crowsetta.Transcriber(format='aud-seq') @@ -38,7 +60,8 @@ class Transcriber: >>> annot Annotation(annot_path=PosixPath('/home/pimienta/.local/share/crowsetta/5.0.0rc1/audseq/405_marron1_June_14_2016_69640887.audacity.txt'), notated_path=None, seq=) # noqa - An example of loading a bounding box-like format with the ``from_file`` method. + An example of loading a bounding box-like format with the + :meth:`~crowsetta.Transcriber.from_file` method. Notice this format has a parameter ``annot_col`` we need to specify for it to load correctly. We can pass this additional parameter into the ``from_file`` method as a keyword argument. @@ -50,18 +73,41 @@ class Transcriber: >>> annot = raven.to_annot() >>> annot Annotation(annot_path=PosixPath('/home/pimienta/.local/share/crowsetta/5.0.0rc1/raven/Recording_1_Segment_02.Table.1.selections.txt'), notated_path=None, bboxes=[BBox(onset=154.387792767, offset=154.911598217, low_freq=2878.2, high_freq=4049.0, label='EATO'), BBox(onset=167.526598245, offset=168.17302044, low_freq=2731.9, high_freq=3902.7, label='EATO'), BBox(onset=183.609636834, offset=184.097751553, low_freq=2878.2, high_freq=3975.8, label='EATO'), BBox(onset=250.527480604, offset=251.160710509, low_freq=2756.2, high_freq=3951.4, label='EATO'), BBox(onset=277.88724277, offset=278.480895806, low_freq=2707.5, high_freq=3975.8, label='EATO'), BBox(onset=295.52970757, offset=296.110168316, low_freq=2951.4, high_freq=3975.8, label='EATO')]) # noqa + + An example of loading a set of annotations in the :class:`~crowsetta.formats.seq.NotMat` format, + converting them to :class:`~crowsetta.Annotation` instances at the same time with method chaining, + and then finally saving them as a csv file, + using the :class:`~crowsetta.formats.seq.GenericSeq` format. + + >>> import pathlib + >>> import crowsetta + >>> notmat_paths = sorted(pathlib.Path('./data/bfsongrepo').glob('*.not.mat') + >>> scribe = crowsetta.Transcriber('notmat') + >>> # next line, use method chaining to load NotMat and convert to crowsetta.Annotation all at once + >>> annots = [scribe.from_file(notmat_path).to_annot() for notmat_path in notmat_paths] + >>> generic_seq = crowsetta.formats.seq.GenericSeq(annots) + >>> generic_seq.to_csv('./data/bfsongrepo/notmats.csv') """ def __init__(self, format: "Union[str, crowsetta.interface.SeqLike, crowsetta.interface.BBoxLike]"): # noqa: F821 - """make a new ``Transcriber`` + """Initialize a new :class:`crowsetta.Transcriber` instance. Parameters ---------- format : str or class - If a string, name of vocal annotation format that ``Transcriber`` will use. - Must be one of ``crowsetta.formats``. - If a class, must be either sequence-like or bounding-box-like, i.e., - registered as either `crowsetta.interface.SeqLike`` or ``crowsetta.interface.BBoxLike``. + If a string, name of annotation format that the + :class:`~crowsetta.Transcriber` will use. + Must be one of the shorthand string names returned by + :func:`crowsetta.formats.as_list`. + If a class, must be one of the classes in + :mod:`crowsetta.formats` that the shorthand strings refer to. + You can register your own class using + :func:`crowsetta.formats.register_format`. + All format classes must be + either sequence-like or bounding-box-like, i.e., + registered as either + :class:`crowsetta.interface.seq.SeqLike` or + :class:`crowsetta.interface.bbox.BBoxLike`. """ # avoid circular imports from . import formats, interface @@ -101,18 +147,19 @@ def from_file( Parameters ---------- annot_path : str, pathlib.Path - path to annotations file + Path to file containing annotations. Returns ------- annotations : class-instance - an instance of the class referred to by ``self.format``, + An instance of the class referred to by ``self.format``, with annotations loaded from ``annot_path`` Examples -------- - An example of loading a sequence-like format with the ``from_file`` method. + An example of loading a sequence-like format with the + :meth:`~crowsetta.Transcriber.from_file` method. >>> import crowsetta >>> scribe = crowsetta.Transcriber(format='aud-seq') @@ -122,9 +169,12 @@ def from_file( >>> annot Annotation(annot_path=PosixPath('/home/pimienta/.local/share/crowsetta/5.0.0rc1/audseq/405_marron1_June_14_2016_69640887.audacity.txt'), notated_path=None, seq=) # noqa - An example of loading a bounding box-like format with the ``from_file`` method. - Notice this format has a parameter ``annot_col`` we need to specify for it to load correctly. - We can pass this additional parameter into the ``from_file`` method + An example of loading a bounding box-like format with the + :meth:`~crowsetta.Transcriber.from_file` method. + Notice this format has a parameter ``annot_col`` + we need to specify for it to load correctly. + We can pass this additional parameter into the + :meth:`~crowsetta.Transcriber.from_file` method as a keyword argument. >>> import crowsetta @@ -134,5 +184,19 @@ def from_file( >>> annot = raven.to_annot() >>> annot Annotation(annot_path=PosixPath('/home/pimienta/.local/share/crowsetta/5.0.0rc1/raven/Recording_1_Segment_02.Table.1.selections.txt'), notated_path=None, bboxes=[BBox(onset=154.387792767, offset=154.911598217, low_freq=2878.2, high_freq=4049.0, label='EATO'), BBox(onset=167.526598245, offset=168.17302044, low_freq=2731.9, high_freq=3902.7, label='EATO'), BBox(onset=183.609636834, offset=184.097751553, low_freq=2878.2, high_freq=3975.8, label='EATO'), BBox(onset=250.527480604, offset=251.160710509, low_freq=2756.2, high_freq=3951.4, label='EATO'), BBox(onset=277.88724277, offset=278.480895806, low_freq=2707.5, high_freq=3975.8, label='EATO'), BBox(onset=295.52970757, offset=296.110168316, low_freq=2951.4, high_freq=3975.8, label='EATO')]) # noqa + + An example of loading a set of annotations in the :class:`~crowsetta.formats.seq.NotMat` format, + converting them to :class:`~crowsetta.Annotation` instances at the same time with method chaining, + and then finally saving them as a csv file, + using the :class:`~crowsetta.formats.seq.GenericSeq` format. + + >>> import pathlib + >>> import crowsetta + >>> notmat_paths = sorted(pathlib.Path('./data/bfsongrepo').glob('*.not.mat') + >>> scribe = crowsetta.Transcriber('notmat') + >>> # next line, use method chaining to load NotMat and convert to crowsetta.Annotation all at once + >>> annots = [scribe.from_file(notmat_path).to_annot() for notmat_path in notmat_paths] + >>> generic_seq = crowsetta.formats.seq.GenericSeq(annots) + >>> generic_seq.to_csv('./data/bfsongrepo/notmats.csv') """ return self._format_class.from_file(annot_path, *args, **kwargs) diff --git a/src/crowsetta/validation.py b/src/crowsetta/validation.py index c2c5732..1583d15 100644 --- a/src/crowsetta/validation.py +++ b/src/crowsetta/validation.py @@ -5,14 +5,15 @@ """ import numbers from pathlib import PurePath -from typing import Union +from typing import Sequence, Union import numpy as np +import numpy.typing as npt from .typing import PathLike -def _num_samples(x): +def _num_samples(x: npt.ArrayLike) -> int: """Return number of samples in array-like x.""" if not hasattr(x, "__len__") and not hasattr(x, "shape"): if hasattr(x, "__array__"): @@ -32,9 +33,10 @@ def _num_samples(x): return len(x) -def check_consistent_length(arrays): +def check_consistent_length(arrays: Sequence[npt.ArrayLike]) -> None: """Check that all arrays have consistent first dimensions. Checks whether all objects in arrays have the same shape or length. + Parameters ---------- arrays : list or tuple of input objects. @@ -48,8 +50,9 @@ def check_consistent_length(arrays): ) -def column_or_row_or_1d(y): - """Ravel column or row vector or 1d numpy array, else raises an error +def column_or_row_or_1d(y: npt.NDArray) -> npt.NDArray: + """Ravel column or row vector or 1d numpy array, + else raises an error Parameters ---------- @@ -67,14 +70,14 @@ def column_or_row_or_1d(y): def validate_ext(file: PathLike, extension: Union[str, tuple]) -> None: - """ "check that a file has a valid extension + """Check that a file has a valid extension. Parameters ---------- file : str, pathlib.Path - path to a file; string or pathlib.Path + Path to a file. extension : str, tuple - valid file extension(s). tuple must be tuple of strings. + Valid file extension(s). Tuple must be tuple of strings. Function expects that extensions will be specified with a period, e.g. {'.phn', '.PHN'} """ @@ -90,10 +93,14 @@ def validate_ext(file: PathLike, extension: Union[str, tuple]) -> None: raise TypeError(f"Extension must be str or tuple but type was {type(extension)}") if not (isinstance(file, str) or isinstance(file, PurePath)): - raise TypeError(f"File must be a str or a pathlib.Path, but type of file was {type(file)}.\n" f"File: {file}") + raise TypeError( + f"File must be a str or a pathlib.Path, but type of file was {type(file)}.\n" f"File: {file}" + ) # we need to use `endswith` instead of # e.g. comparing with `pathlib.Path.suffix` # because suffix won't work for "multi-part" extensions like '.not.mat' if not any([str(file).endswith(ext) for ext in extension]): - raise ValueError(f"Invalid extension for file: {file}.\n" f"Valid extension(s): '{extension}'") + raise ValueError( + f"Invalid extension for file: {file}.\n" f"Valid extension(s): '{extension}'" + )