From 7a46cfb27a8b05c1b1dd31e605a25798e077c55e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Thu, 19 Dec 2024 07:33:25 -0800
Subject: [PATCH 01/24] Add dunder methods for the Python Record class

---
 src/python.rs | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
diff --git a/src/python.rs b/src/python.rs
index 35b2822..ad2c0d7 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -1,5 +1,7 @@
 //! Python bindings for needletail
 
+use std::collections::hash_map::DefaultHasher;
+use std::hash::{Hash, Hasher};
 use std::io::Cursor;
 
 use pyo3::prelude::*;
@@ -72,6 +74,59 @@ impl Record {
         }
         Ok(())
     }
+
+    pub fn __hash__(&self) -> PyResult<u64> {
+        let mut hasher = DefaultHasher::new();
+        self.id.hash(&mut hasher);
+        self.seq.hash(&mut hasher);
+        if !self.qual.is_none() {
+            self.qual.hash(&mut hasher);
+        }
+        Ok(hasher.finish())
+    }
+
+    pub fn __eq__(&self, other: &Record) -> PyResult<bool> {
+        Ok(self.id == other.id && self.seq == other.seq && self.qual == other.qual)
+    }
+
+    pub fn __len__(&self) -> PyResult<usize> {
+        Ok(self.seq.len())
+    }
+
+    pub fn __str__(&self) -> PyResult<String> {
+        if self.qual.is_none() {
+            let wrapped_seq = self
+                .seq
+                .as_bytes()
+                .chunks(60)
+                .map(|chunk| String::from_utf8_lossy(chunk).to_string())
+                .collect::<Vec<String>>()
+                .join("\n");
+            Ok(format!(">{}\n{}", self.id, wrapped_seq))
+        } else {
+            Ok(format!(
+                "@{}\n{}\n+\n{}",
+                self.id,
+                self.seq,
+                self.qual.clone().unwrap()
+            ))
+        }
+    }
+
+    fn __repr__(&self) -> PyResult<String> {
+        let seq_preview = if self.seq.len() > 40 {
+            let start = &self.seq[..34];
+            let end = &self.seq[self.seq.len() - 3..];
+            format!("{}...{}", start, end)
+        } else {
+            self.seq.clone()
+        };
+        let has_quality = self.qual.is_some();
+        Ok(format!(
+            "Record(id={}, sequence={}, has_quality={})",
+            self.id, seq_preview, has_quality
+        ))
+    }
 }
 
 #[pyclass]

From afe778c99bd5b5b80e4bfa9eadb48e82507d9efc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Thu, 19 Dec 2024 13:13:24 -0800
Subject: [PATCH 02/24] Improve `Record.__repr__()`

---
 src/python.rs | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index ad2c0d7..84478ee 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -1,17 +1,14 @@
 //! Python bindings for needletail
 
-use std::collections::hash_map::DefaultHasher;
-use std::hash::{Hash, Hasher};
-use std::io::Cursor;
-
-use pyo3::prelude::*;
-use pyo3::{create_exception, wrap_pyfunction};
-
 use crate::sequence::{complement, normalize};
 use crate::{
     parse_fastx_file as rs_parse_fastx_file, parse_fastx_reader, parser::SequenceRecord,
     FastxReader,
 };
+use pyo3::prelude::*;
+use pyo3::{create_exception, wrap_pyfunction};
+use std::hash::{DefaultHasher, Hash, Hasher};
+use std::io::Cursor;
 
 create_exception!(needletail, NeedletailError, pyo3::exceptions::PyException);
 
@@ -79,8 +76,9 @@ impl Record {
         let mut hasher = DefaultHasher::new();
         self.id.hash(&mut hasher);
         self.seq.hash(&mut hasher);
-        if !self.qual.is_none() {
-            self.qual.hash(&mut hasher);
+        match &self.qual {
+            Some(qual) => qual.hash(&mut hasher),
+            None => {}
         }
         Ok(hasher.finish())
     }
@@ -114,17 +112,28 @@ impl Record {
     }
 
     fn __repr__(&self) -> PyResult<String> {
-        let seq_preview = if self.seq.len() > 40 {
-            let start = &self.seq[..34];
+        let seq_preview = if self.seq.len() > 30 {
+            let start = &self.seq[..26];
             let end = &self.seq[self.seq.len() - 3..];
-            format!("{}...{}", start, end)
+            format!("{}…{}", start, end)
         } else {
             self.seq.clone()
         };
-        let has_quality = self.qual.is_some();
+        let quality_preview = match &self.qual {
+            Some(qual) => {
+                if qual.len() > 30 {
+                    let start = &qual[..26];
+                    let end = &qual[qual.len() - 3..];
+                    format!("{}…{}", start, end)
+                } else {
+                    qual.clone()
+                }
+            }
+            None => "None".to_string(),
+        };
         Ok(format!(
-            "Record(id={}, sequence={}, has_quality={})",
-            self.id, seq_preview, has_quality
+            "Record(id={}, sequence={}, quality={})",
+            self.id, seq_preview, quality_preview
         ))
     }
 }

From 19a604075c17f893b078ed8d5cd312714a2af4cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Thu, 19 Dec 2024 13:23:58 -0800
Subject: [PATCH 03/24] Add a constructor to `Record`

---
 src/python.rs | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/python.rs b/src/python.rs
index 84478ee..2a4464f 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -5,6 +5,8 @@ use crate::{
     parse_fastx_file as rs_parse_fastx_file, parse_fastx_reader, parser::SequenceRecord,
     FastxReader,
 };
+
+use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 use pyo3::{create_exception, wrap_pyfunction};
 use std::hash::{DefaultHasher, Hash, Hasher};
@@ -72,6 +74,20 @@ impl Record {
         Ok(())
     }
 
+    #[new]
+    #[pyo3(signature = (id, seq, qual=None))]
+    fn new(id: String, seq: String, qual: Option<String>) -> PyResult<Record> {
+        // If `qual` is not None, check if it has the same length as `seq`
+        if let Some(qual) = &qual {
+            if qual.len() != seq.len() {
+                return Err(PyValueError::new_err(
+                    "Sequence and quality strings must have the same length",
+                ));
+            }
+        }
+        Ok(Record { id, seq, qual })
+    }
+
     pub fn __hash__(&self) -> PyResult<u64> {
         let mut hasher = DefaultHasher::new();
         self.id.hash(&mut hasher);

From 3497b4e09d79ce1fa242fcc09f26ca14235747eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Thu, 19 Dec 2024 13:30:57 -0800
Subject: [PATCH 04/24] Expose the `Record` class

---
 src/python.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python.rs b/src/python.rs
index 2a4464f..92dff78 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -210,11 +210,11 @@ pub fn reverse_complement(seq: &str) -> String {
 #[pymodule]
 fn needletail(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<PyFastxReader>()?;
+    m.add_class::<Record>()?;
     m.add_wrapped(wrap_pyfunction!(parse_fastx_file))?;
     m.add_wrapped(wrap_pyfunction!(parse_fastx_string))?;
     m.add_wrapped(wrap_pyfunction!(normalize_seq))?;
     m.add_wrapped(wrap_pyfunction!(reverse_complement))?;
     m.add("NeedletailError", py.get_type_bound::<NeedletailError>())?;
-
     Ok(())
 }

From c417c3d19dde0bc44dd658e6f85785bc82e4a57e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Thu, 19 Dec 2024 13:48:56 -0800
Subject: [PATCH 05/24] Turn `is_fasta` and `is_fastq` into properties

---
 src/python.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/python.rs b/src/python.rs
index 92dff78..8669c65 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -59,10 +59,12 @@ impl Record {
 
 #[pymethods]
 impl Record {
+    #[getter]
     pub fn is_fasta(&self) -> PyResult<bool> {
         Ok(self.qual.is_none())
     }
 
+    #[getter]
     pub fn is_fastq(&self) -> PyResult<bool> {
         Ok(self.qual.is_some())
     }

From 786d6b8c20c6e11db47ef3922b30b7f7adc533ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Thu, 19 Dec 2024 16:34:59 -0800
Subject: [PATCH 06/24] Refactor snippet generation

---
 src/python.rs | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index 8669c65..1a4389a 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -26,6 +26,16 @@ pub struct PyFastxReader {
     reader: Box<dyn FastxReader>,
 }
 
+fn get_string_snippet(seq: &str, max_len: usize) -> String {
+    if seq.len() > max_len {
+        let start = &seq[..max_len - 4];
+        let end = &seq[seq.len() - 3..];
+        format!("{}…{}", start, end)
+    } else {
+        seq.to_string()
+    }
+}
+
 #[pymethods]
 impl PyFastxReader {
     fn __repr__(&self) -> PyResult<String> {
@@ -130,28 +140,14 @@ impl Record {
     }
 
     fn __repr__(&self) -> PyResult<String> {
-        let seq_preview = if self.seq.len() > 30 {
-            let start = &self.seq[..26];
-            let end = &self.seq[self.seq.len() - 3..];
-            format!("{}…{}", start, end)
-        } else {
-            self.seq.clone()
-        };
-        let quality_preview = match &self.qual {
-            Some(qual) => {
-                if qual.len() > 30 {
-                    let start = &qual[..26];
-                    let end = &qual[qual.len() - 3..];
-                    format!("{}…{}", start, end)
-                } else {
-                    qual.clone()
-                }
-            }
+        let seq_snippet = get_string_snippet(&self.seq, 30);
+        let quality_snippet = match &self.qual {
+            Some(qual) => get_string_snippet(qual, 30),
             None => "None".to_string(),
         };
         Ok(format!(
             "Record(id={}, sequence={}, quality={})",
-            self.id, seq_preview, quality_preview
+            self.id, seq_snippet, quality_snippet
         ))
     }
 }

From b39ab088d1357b08fe1e93fc68c71dfebeba124a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Thu, 19 Dec 2024 17:19:36 -0800
Subject: [PATCH 07/24] Add the `name` and `description` properties

---
 src/python.rs | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/python.rs b/src/python.rs
index 1a4389a..517d157 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -69,6 +69,24 @@ impl Record {
 
 #[pymethods]
 impl Record {
+    #[getter]
+    pub fn name(&self) -> PyResult<&str> {
+        if let Some(pos) = self.id.find(char::is_whitespace) {
+            Ok(&self.id[..pos])
+        } else {
+            Ok(&self.id)
+        }
+    }
+
+    #[getter]
+    pub fn description(&self) -> PyResult<Option<&str>> {
+        if let Some(pos) = self.id.find(char::is_whitespace) {
+            Ok(Some(&self.id[pos..].trim_start()))
+        } else {
+            Ok(None)
+        }
+    }
+
     #[getter]
     pub fn is_fasta(&self) -> PyResult<bool> {
         Ok(self.qual.is_none())

From 3f348ecf1aac398f2d7ba12392006c35690b08e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Thu, 19 Dec 2024 17:20:23 -0800
Subject: [PATCH 08/24] Condense long strings in Record's __repr__

---
 src/python.rs | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index 517d157..1f5271a 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -26,7 +26,7 @@ pub struct PyFastxReader {
     reader: Box<dyn FastxReader>,
 }
 
-fn get_string_snippet(seq: &str, max_len: usize) -> String {
+fn get_seq_snippet(seq: &str, max_len: usize) -> String {
     if seq.len() > max_len {
         let start = &seq[..max_len - 4];
         let end = &seq[seq.len() - 3..];
@@ -158,14 +158,19 @@ impl Record {
     }
 
     fn __repr__(&self) -> PyResult<String> {
-        let seq_snippet = get_string_snippet(&self.seq, 30);
+        let id_snippet = match self.name() {
+            Ok(name) if name != self.id => format!("{}…", name),
+            Ok(name) => name.to_string(),
+            Err(_) => self.id.clone(),
+        };
+        let seq_snippet = get_seq_snippet(&self.seq, 30);
         let quality_snippet = match &self.qual {
-            Some(qual) => get_string_snippet(qual, 30),
+            Some(qual) => get_seq_snippet(qual, 30),
             None => "None".to_string(),
         };
         Ok(format!(
             "Record(id={}, sequence={}, quality={})",
-            self.id, seq_snippet, quality_snippet
+            id_snippet, seq_snippet, quality_snippet
         ))
     }
 }

From a4687e891cc0d5a13a017a00699752c7c1b5e4dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Thu, 19 Dec 2024 17:25:58 -0800
Subject: [PATCH 09/24] Shorten Record's __repr__

---
 src/python.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index 1f5271a..e52ba58 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -163,13 +163,13 @@ impl Record {
             Ok(name) => name.to_string(),
             Err(_) => self.id.clone(),
         };
-        let seq_snippet = get_seq_snippet(&self.seq, 30);
+        let seq_snippet = get_seq_snippet(&self.seq, 25);
         let quality_snippet = match &self.qual {
-            Some(qual) => get_seq_snippet(qual, 30),
+            Some(qual) => get_seq_snippet(qual, 25),
             None => "None".to_string(),
         };
         Ok(format!(
-            "Record(id={}, sequence={}, quality={})",
+            "Record(id={}, seq={}, qual={})",
             id_snippet, seq_snippet, quality_snippet
         ))
     }

From ce44c50641d39b7091c3e078fdc273d218c55127 Mon Sep 17 00:00:00 2001
From: Antonio Camargo <antoniop.camargo@gmail.com>
Date: Wed, 22 Jan 2025 12:27:29 -0800
Subject: [PATCH 10/24] Do not wrap sequences in __str__

---
 src/python.rs | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index e52ba58..2f94bfe 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -139,14 +139,7 @@ impl Record {
 
     pub fn __str__(&self) -> PyResult<String> {
         if self.qual.is_none() {
-            let wrapped_seq = self
-                .seq
-                .as_bytes()
-                .chunks(60)
-                .map(|chunk| String::from_utf8_lossy(chunk).to_string())
-                .collect::<Vec<String>>()
-                .join("\n");
-            Ok(format!(">{}\n{}", self.id, wrapped_seq))
+            Ok(format!(">{}\n{}", self.id, self.seq))
         } else {
             Ok(format!(
                 "@{}\n{}\n+\n{}",

From 3770636f915ecf6af2c32062e67d0e52af4c3f9b Mon Sep 17 00:00:00 2001
From: Antonio Camargo <antoniop.camargo@gmail.com>
Date: Wed, 22 Jan 2025 12:30:10 -0800
Subject: [PATCH 11/24] Add include a newline at the end of FASTX strings

---
 src/python.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index 2f94bfe..dd5ca53 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -139,10 +139,10 @@ impl Record {
 
     pub fn __str__(&self) -> PyResult<String> {
         if self.qual.is_none() {
-            Ok(format!(">{}\n{}", self.id, self.seq))
+            Ok(format!(">{}\n{}\n", self.id, self.seq))
         } else {
             Ok(format!(
-                "@{}\n{}\n+\n{}",
+                "@{}\n{}\n+\n{}\n",
                 self.id,
                 self.seq,
                 self.qual.clone().unwrap()

From 9cdf8e5e3e138ac2101b2294ff52906e8f7fc1b8 Mon Sep 17 00:00:00 2001
From: Antonio Camargo <antoniop.camargo@gmail.com>
Date: Wed, 22 Jan 2025 12:43:05 -0800
Subject: [PATCH 12/24] Make is_fasta and is_fastq regular methods again

---
 src/python.rs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index dd5ca53..3ea2fb8 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -87,12 +87,10 @@ impl Record {
         }
     }
 
-    #[getter]
     pub fn is_fasta(&self) -> PyResult<bool> {
         Ok(self.qual.is_none())
     }
 
-    #[getter]
     pub fn is_fastq(&self) -> PyResult<bool> {
         Ok(self.qual.is_some())
     }

From 01fe91ea55bfd15b6b25cbda43f1e679c63befb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Wed, 22 Jan 2025 16:33:00 -0800
Subject: [PATCH 13/24] Add docstrings to the Python classes and functions

---
 src/python.rs | 165 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 160 insertions(+), 5 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index 3cb2e2a..a4074a5 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -21,11 +21,6 @@ macro_rules! py_try {
     };
 }
 
-#[pyclass]
-pub struct PyFastxReader {
-    reader: Box<dyn FastxReader>,
-}
-
 fn get_seq_snippet(seq: &str, max_len: usize) -> String {
     if seq.len() > max_len {
         let start = &seq[..max_len - 4];
@@ -36,6 +31,26 @@ fn get_seq_snippet(seq: &str, max_len: usize) -> String {
     }
 }
 
+/// An iterator that yields sequence records.
+///
+/// Yields
+/// ------
+/// Record
+///     A `Record` object representing a sequence record.
+///
+/// See also
+/// --------
+/// parse_fastx_file:
+///     A function to parse sequence records from a FASTA/FASTQ file.
+/// parse_fastx_string:
+///     A function to parse sequence records from a FASTA/FASTQ string.
+/// Record:
+///     A class representing a FASTA/FASTQ sequence record.
+#[pyclass]
+pub struct PyFastxReader {
+    reader: Box<dyn FastxReader>,
+}
+
 #[pymethods]
 impl PyFastxReader {
     fn __repr__(&self) -> PyResult<String> {
@@ -56,6 +71,43 @@ impl PyFastxReader {
     }
 }
 
+/// A record representing a biological sequence.
+///
+/// Parameters
+/// ----------
+/// id : str
+///     The identifier of the sequence record.
+/// seq : str
+///     A string representing the sequence.
+///
+/// Attributes
+/// ----------
+/// id : str
+///     The identifier of the sequence record. In a FASTA file, this is the
+///     string containing all characters (including whitespaces) after the
+///     leading '>' character. In a FASTQ file, this is the string containing
+///     all characters (including whitespaces) after the leading '@' character.
+/// seq : str
+///     A string representing the sequence.
+/// qual : str, optional
+///     A string representing the quality scores of the sequence. If the object
+///     represents a FASTA record, this attribute will be `None`.
+/// name : str
+///     The name of the sequence record. This is the string before the first
+///     whitespace character in the `id` attribute.
+/// description : str, optional
+///     The description of the sequence record. This is the string after the
+///     first whitespace character in the `id` attribute. If the `id` attribute
+///     contains no whitespace characters, this attribute will be `None`.
+///
+/// Methods
+/// -------
+/// is_fasta
+///     Check if the object represents a FASTA record.
+/// is_fastq
+///     Check if the object represents a FASTQ record.
+/// normalize(iupac)
+///     Normalize the sequence stored in the `seq` attribute of the object.
 #[pyclass]
 pub struct Record {
     #[pyo3(get)]
@@ -96,14 +148,31 @@ impl Record {
         }
     }
 
+    /// Check if the object represents a FASTA record.
+    ///
+    /// Returns
+    /// -------
+    /// bool
+    ///     `True` if the record lacks quality information, otherwise `False`.
     pub fn is_fasta(&self) -> PyResult<bool> {
         Ok(self.qual.is_none())
     }
 
+    /// Check if the object represents a FASTQ record.
+    ///
+    /// Returns
+    /// -------
+    /// bool
+    ///     `True` if the record has quality information, otherwise `False`.
     pub fn is_fastq(&self) -> PyResult<bool> {
         Ok(self.qual.is_some())
     }
 
+    /// Normalize the sequence stored in the `seq` attribute of the object.
+    ///
+    /// See also
+    /// --------
+    /// normalize_seq: A function to normalize nucleotide sequence strings.
     pub fn normalize(&mut self, iupac: bool) -> PyResult<()> {
         if let Some(s) = normalize(self.seq.as_bytes(), iupac) {
             self.seq = String::from_utf8_lossy(&s).to_string();
@@ -178,18 +247,93 @@ impl Record {
 // TODO: what would be really nice is to detect the type of pyobject so it would on file object etc
 // not for initial release though
 
+/// An iterator that reads sequence records from a FASTA/FASTQ file.
+///
+/// Parameters
+/// ----------
+/// path : str
+///     The path to a FASTA/FASTQ file.
+///
+/// Returns
+/// -------
+/// PyFastxReader
+///     A `PyFastxReader` iterator that yields `Record` objects representing
+///     sequences from the input file.
+///
+/// Raises
+/// ------
+/// NeedletailError
+///     If an error occurs while reading and parsing the input file.
+///
+/// See also
+/// --------
+/// parse_fastx_string:
+///     A function to parse sequence records from a FASTA/FASTQ string.
+/// PyFastxReader:
+///     A class with instances that are iterators that yield `Record` objects.
 #[pyfunction]
 fn parse_fastx_file(path: &str) -> PyResult<PyFastxReader> {
     let reader = py_try!(rs_parse_fastx_file(path));
     Ok(PyFastxReader { reader })
 }
 
+/// Parse sequence records from a FASTA/FASTQ string.
+///
+/// Parameters
+/// ----------
+/// content : str
+///     A string containing FASTA/FASTQ-formatted sequence records.
+///
+/// Returns
+/// -------
+/// PyFastxReader
+///     A `PyFastxReader` iterator that yields `Record` objects representing
+///     sequences from the input string.
+///
+/// Raises
+/// ------
+/// NeedletailError
+///     If an error occurs while parsing the input string.
+///
+/// See also
+/// --------
+/// parse_fastx_file:
+///     A function to parse sequence records from a FASTA/FASTQ file.
+/// PyFastxReader:
+///     A class with instances that are iterators that yield `Record` objects.
 #[pyfunction]
 fn parse_fastx_string(content: &str) -> PyResult<PyFastxReader> {
     let reader = py_try!(parse_fastx_reader(Cursor::new(content.to_owned())));
     Ok(PyFastxReader { reader })
 }
 
+/// Normalize the sequence string of nucleotide records by:
+/// - Converting lowercase characters to uppercase.
+/// - Removing whitespace and newline characters.
+/// - Replacing 'U' with 'T'.
+/// - Replacing '.' and '~' with '-'.
+/// - Replacing characters not in 'ACGTN-' with 'N'.
+///
+/// Parameters
+/// ----------
+/// seq : str
+///     A string representing a nucleotide sequence.
+/// iupac : bool
+///     If `True`, characters representing nucleotide ambiguity ('B', 'D',
+///     'H', 'V', 'R', 'Y', 'S', 'W', 'K', and 'M', and their lowercase
+///     forms) will not be converted to 'N'. Lowercase characters will still
+///     be converted to uppercase.
+///
+/// Returns
+/// -------
+/// str
+///     The normalized sequence string.
+///
+/// Notes
+/// -----
+///     The `normalize` method is designed for nucleotide sequences only. If
+///     used with protein sequences, it will incorrectly process amino acid
+///     characters as if they were nucleotides.
 #[pyfunction]
 pub fn normalize_seq(seq: &str, iupac: bool) -> PyResult<String> {
     if let Some(s) = normalize(seq.as_bytes(), iupac) {
@@ -199,6 +343,17 @@ pub fn normalize_seq(seq: &str, iupac: bool) -> PyResult<String> {
     }
 }
 
+/// Compute the reverse complement of a nucleotide sequence.
+///
+/// Parameters:
+/// -----------
+/// seq : str
+///     A string representing a nucleotide sequence.
+///
+/// Returns:
+/// --------
+/// str
+///     The reverse complement of the input nucleotide sequence.
 #[pyfunction]
 pub fn reverse_complement(seq: &str) -> String {
     let comp: Vec<u8> = seq

From 78d8a7f1f2f31b033f07e2ddffd43a27095244e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Wed, 22 Jan 2025 16:39:28 -0800
Subject: [PATCH 14/24] Add a to-do list to the beggining of the file

---
 src/python.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index a4074a5..f70fe55 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -1,5 +1,10 @@
 //! Python bindings for needletail
 
+// TODO:
+// - Add support for `pathlib.Path` objects in `parse_fastx_file`.
+// - Make `normalize_seq` and `reverse_complement` functions able to handle
+//  `Record` objects as input.
+
 use crate::sequence::{complement, normalize};
 use crate::{
     parse_fastx_file as rs_parse_fastx_file, parse_fastx_reader, parser::SequenceRecord,
@@ -244,9 +249,6 @@ impl Record {
     }
 }
 
-// TODO: what would be really nice is to detect the type of pyobject so it would on file object etc
-// not for initial release though
-
 /// An iterator that reads sequence records from a FASTA/FASTQ file.
 ///
 /// Parameters

From 7cd2c47e8bcd66e021b6be9d12d1734288f42c9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Wed, 22 Jan 2025 16:41:26 -0800
Subject: [PATCH 15/24] Add item to to-do list

---
 src/python.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/python.rs b/src/python.rs
index f70fe55..e7d5f15 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -1,6 +1,7 @@
 //! Python bindings for needletail
 
 // TODO:
+// - Make the return values of `__repr__` and `__str__` show up as raw strings.
 // - Add support for `pathlib.Path` objects in `parse_fastx_file`.
 // - Make `normalize_seq` and `reverse_complement` functions able to handle
 //  `Record` objects as input.

From b900fb806de57fee5f6d7e1cb00892de3f6de742 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?=
 <apcamargo@users.noreply.github.com>
Date: Thu, 23 Jan 2025 17:46:49 -0800
Subject: [PATCH 16/24] Fix indentation in `normalize_seq` docstring

---
 src/python.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index e7d5f15..a8fddd7 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -334,9 +334,9 @@ fn parse_fastx_string(content: &str) -> PyResult<PyFastxReader> {
 ///
 /// Notes
 /// -----
-///     The `normalize` method is designed for nucleotide sequences only. If
-///     used with protein sequences, it will incorrectly process amino acid
-///     characters as if they were nucleotides.
+/// The `normalize` method is designed for nucleotide sequences only. If
+/// used with protein sequences, it will incorrectly process amino acid
+/// characters as if they were nucleotides.
 #[pyfunction]
 pub fn normalize_seq(seq: &str, iupac: bool) -> PyResult<String> {
     if let Some(s) = normalize(seq.as_bytes(), iupac) {

From eaba5b1bf81dbc61726224c95a29aaf9ce38f285 Mon Sep 17 00:00:00 2001
From: Antonio Camargo <antoniop.camargo@gmail.com>
Date: Thu, 23 Jan 2025 20:27:42 -0800
Subject: [PATCH 17/24] Update `parse_fastx_file` to accept `pathlib.Path`
 objects

---
 src/python.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index a8fddd7..d97f3da 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -2,7 +2,6 @@
 
 // TODO:
 // - Make the return values of `__repr__` and `__str__` show up as raw strings.
-// - Add support for `pathlib.Path` objects in `parse_fastx_file`.
 // - Make `normalize_seq` and `reverse_complement` functions able to handle
 //  `Record` objects as input.
 
@@ -17,6 +16,7 @@ use pyo3::prelude::*;
 use pyo3::{create_exception, wrap_pyfunction};
 use std::hash::{DefaultHasher, Hash, Hasher};
 use std::io::Cursor;
+use std::path::PathBuf;
 
 create_exception!(needletail, NeedletailError, pyo3::exceptions::PyException);
 
@@ -254,7 +254,7 @@ impl Record {
 ///
 /// Parameters
 /// ----------
-/// path : str
+/// path : str or pathlib.Path
 ///     The path to a FASTA/FASTQ file.
 ///
 /// Returns
@@ -275,7 +275,7 @@ impl Record {
 /// PyFastxReader:
 ///     A class with instances that are iterators that yield `Record` objects.
 #[pyfunction]
-fn parse_fastx_file(path: &str) -> PyResult<PyFastxReader> {
+fn parse_fastx_file(path: PathBuf) -> PyResult<PyFastxReader> {
     let reader = py_try!(rs_parse_fastx_file(path));
     Ok(PyFastxReader { reader })
 }

From 4fcf73b161bc344643e044fb1c94132a6ef173e9 Mon Sep 17 00:00:00 2001
From: Antonio Camargo <antoniop.camargo@gmail.com>
Date: Fri, 24 Jan 2025 14:16:50 -0800
Subject: [PATCH 18/24] Set the default of valuehe iupac parameter to False

---
 src/python.rs | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index d97f3da..7db5dbd 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -179,6 +179,7 @@ impl Record {
     /// See also
     /// --------
     /// normalize_seq: A function to normalize nucleotide sequence strings.
+    #[pyo3(signature = (iupac=false))]
     pub fn normalize(&mut self, iupac: bool) -> PyResult<()> {
         if let Some(s) = normalize(self.seq.as_bytes(), iupac) {
             self.seq = String::from_utf8_lossy(&s).to_string();
@@ -311,17 +312,20 @@ fn parse_fastx_string(content: &str) -> PyResult<PyFastxReader> {
 }
 
 /// Normalize the sequence string of nucleotide records by:
-/// - Converting lowercase characters to uppercase.
-/// - Removing whitespace and newline characters.
-/// - Replacing 'U' with 'T'.
-/// - Replacing '.' and '~' with '-'.
-/// - Replacing characters not in 'ACGTN-' with 'N'.
+///
+///     - Converting lowercase characters to uppercase.
+///     - Removing whitespace and newline characters.
+///     - Replacing 'U' with 'T'.
+///     - Replacing '.' and '~' with '-'.
+///     - Replacing characters not in 'ACGTN-' with 'N', unless `iupac` is set
+///       to `True`, in which case characters representing nucleotide ambiguity
+///       are not replaced.
 ///
 /// Parameters
 /// ----------
 /// seq : str
 ///     A string representing a nucleotide sequence.
-/// iupac : bool
+/// iupac : bool, default: False
 ///     If `True`, characters representing nucleotide ambiguity ('B', 'D',
 ///     'H', 'V', 'R', 'Y', 'S', 'W', 'K', and 'M', and their lowercase
 ///     forms) will not be converted to 'N'. Lowercase characters will still
@@ -338,6 +342,7 @@ fn parse_fastx_string(content: &str) -> PyResult<PyFastxReader> {
 /// used with protein sequences, it will incorrectly process amino acid
 /// characters as if they were nucleotides.
 #[pyfunction]
+#[pyo3(signature = (seq, iupac=false))]
 pub fn normalize_seq(seq: &str, iupac: bool) -> PyResult<String> {
     if let Some(s) = normalize(seq.as_bytes(), iupac) {
         Ok(String::from_utf8_lossy(&s).to_string())

From 89d94f2613bddc0550968ff4d7ead6b52886f803 Mon Sep 17 00:00:00 2001
From: Antonio Camargo <antoniop.camargo@gmail.com>
Date: Fri, 24 Jan 2025 14:20:47 -0800
Subject: [PATCH 19/24] Add new task to to-do

---
 src/python.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/python.rs b/src/python.rs
index 7db5dbd..882e1b1 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -1,6 +1,8 @@
 //! Python bindings for needletail
 
 // TODO:
+// - Add a property to the `Record` class that returns the quality scores as a
+//   list of integers.
 // - Make the return values of `__repr__` and `__str__` show up as raw strings.
 // - Make `normalize_seq` and `reverse_complement` functions able to handle
 //  `Record` objects as input.

From 1e8b7f41dabdccda8c466710ec8aaa90f958c3f3 Mon Sep 17 00:00:00 2001
From: Antonio Camargo <antoniop.camargo@gmail.com>
Date: Fri, 24 Jan 2025 14:56:44 -0800
Subject: [PATCH 20/24] Update Python tests

---
 src/python.rs  |   4 +-
 test_python.py | 186 +++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 153 insertions(+), 37 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index 882e1b1..93a9d1f 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -241,9 +241,9 @@ impl Record {
             Ok(name) => name.to_string(),
             Err(_) => self.id.clone(),
         };
-        let seq_snippet = get_seq_snippet(&self.seq, 25);
+        let seq_snippet = get_seq_snippet(&self.seq, 20);
         let quality_snippet = match &self.qual {
-            Some(qual) => get_seq_snippet(qual, 25),
+            Some(qual) => get_seq_snippet(qual, 20),
             None => "None".to_string(),
         };
         Ok(format!(
diff --git a/test_python.py b/test_python.py
index 6d2bf4d..c154ae5 100644
--- a/test_python.py
+++ b/test_python.py
@@ -1,13 +1,153 @@
 import unittest
+from pathlib import Path
 
-from needletail import parse_fastx_file, parse_fastx_string, NeedletailError, reverse_complement, normalize_seq
-
+from needletail import (
+    NeedletailError,
+    Record,
+    normalize_seq,
+    parse_fastx_file,
+    parse_fastx_string,
+    reverse_complement,
+)
 
 FASTA_FILE = "./tests/data/test.fa"
 FASTQ_FILE = "./tests/specimen/FASTQ/example.fastq"
 
 
-class ParsingTestCase(unittest.TestCase):
+class RecordClassTestCase(unittest.TestCase):
+    def test_fasta_record(self):
+        record = Record("test description", "AGCTGATCGA")
+        self.assertEqual(record.id, "test description")
+        self.assertEqual(record.seq, "AGCTGATCGA")
+        self.assertIsNone(record.qual)
+
+    def test_fastq_record(self):
+        record = Record("test description", "AGCTGATCGA", ";**9;;????")
+        self.assertEqual(record.id, "test description")
+        self.assertEqual(record.seq, "AGCTGATCGA")
+        self.assertEqual(record.qual, ";**9;;????")
+
+    def test_record_properties(self):
+        record = Record("test description", "AGCTGATCGA")
+        self.assertEqual(record.name, "test")
+        self.assertEqual(record.description, "description")
+
+    def test_record_normalize(self):
+        record = Record("test", "AGCTGYrtcga")
+        record.normalize(iupac=True)
+        self.assertEqual(record.seq, "AGCTGYRTCGA")
+        record.normalize()
+        self.assertEqual(record.seq, "AGCTGNNTCGA")
+
+    def test_format_record_method(self):
+        record = Record("test", "AGCTGATCGA")
+        self.assertTrue(record.is_fasta())
+        self.assertFalse(record.is_fastq())
+        record = Record("test", "AGCTGATCGA", ";**9;;????")
+        self.assertFalse(record.is_fasta())
+        self.assertTrue(record.is_fastq())
+
+    def test_record_eq(self):
+        record1 = Record("test", "AGCTGATCGA", ";**9;;????")
+        record2 = Record("test", "AGCTGATCGA", ";**9;;????")
+        record3 = Record("test2", "AGCTGATCGA", ";**9;;????")
+        record4 = Record("test", "TCGATCAGCT", ";**9;;????")
+        record5 = Record("test", "AGCTGATCGA", "????;**9;;")
+        record6 = Record("test", "AGCTGATCGA")
+        self.assertEqual(record1, record2)
+        self.assertNotEqual(record1, record3)
+        self.assertNotEqual(record1, record4)
+        self.assertNotEqual(record1, record5)
+        self.assertNotEqual(record1, record6)
+
+    def test_record_str(self):
+        self.assertEqual(str(Record("test", "AGCTGATCGA")), ">test\nAGCTGATCGA\n")
+        self.assertEqual(
+            str(Record("test", "AGCTGATCGA", ";**9;;????")),
+            "@test\nAGCTGATCGA\n+\n;**9;;????\n",
+        )
+
+    def test_record_repr(self):
+        self.assertEqual(
+            repr(Record("test", "AGCTGATCGAAGCTGATCGAA")),
+            "Record(id=test, seq=AGCTGATCGAAGCTGA…GAA, qual=None)",
+        )
+        self.assertEqual(
+            repr(Record("test", "AGCTGATCGAAGCTGATCGAA", ";**9;;????;**9;;????;")),
+            "Record(id=test, seq=AGCTGATCGAAGCTGA…GAA, qual=;**9;;????;**9;;…??;)",
+        )
+
+    def test_record_len(self):
+        self.assertEqual(len(Record("test", "AGCTGATCGA")), 10)
+
+    def test_record_hash(self):
+        record1 = Record("test", "AGCTGATCGA")
+        record2 = Record("test", "AGCTGATCGA")
+        record3 = Record("test", "AGCTGATCGA", ";**9;;????")
+        record4 = Record("test", "AGCTGATCGA", ";**9;;????")
+        record5 = Record("test", "TCGATCAGCT")
+        record6 = Record("test2", "AGCTGATCGA")
+        record7 = Record("test", "AGCTGATCGA", "????;**9;;")
+        self.assertEqual(hash(record1), hash(record2))
+        self.assertNotEqual(hash(record1), hash(record3))
+        self.assertNotEqual(hash(record1), hash(record5))
+        self.assertNotEqual(hash(record1), hash(record6))
+        self.assertNotEqual(hash(record1), hash(record3))
+        self.assertEqual(hash(record3), hash(record4))
+        self.assertNotEqual(hash(record3), hash(record7))
+
+
+class NormalizeTestCase(unittest.TestCase):
+    def test_no_normalization_needed(self):
+        self.assertEqual(normalize_seq("ACGTU", iupac=False), "ACGTT")
+
+    def test_capitalization(self):
+        self.assertEqual(normalize_seq("acgtu", iupac=False), "ACGTT")
+
+    def test_default_parameters(self):
+        self.assertEqual(
+            normalize_seq("BDHVRYSWKM"), normalize_seq("BDHVRYSWKM", iupac=False)
+        )
+
+    def test_iupac_parameter(self):
+        self.assertEqual(normalize_seq("BDHVRYSWKM", iupac=False), "NNNNNNNNNN")
+        self.assertEqual(normalize_seq("BDHVRYSWKM", iupac=True), "BDHVRYSWKM")
+        self.assertEqual(normalize_seq("bdhvryswkm", iupac=True), "BDHVRYSWKM")
+
+    def test_gap_normalization(self):
+        self.assertEqual(normalize_seq("N-N-N-N", iupac=False), "N-N-N-N")
+        self.assertEqual(normalize_seq("N.N.N.N", iupac=False), "N-N-N-N")
+        self.assertEqual(normalize_seq("N~N~N~N", iupac=False), "N-N-N-N")
+
+    def test_whitespace_removal(self):
+        self.assertEqual(normalize_seq("N N N N", iupac=False), "NNNN")
+        self.assertEqual(normalize_seq("N\tN\tN\tN", iupac=False), "NNNN")
+        self.assertEqual(normalize_seq("N\nN\nN\nN", iupac=False), "NNNN")
+        self.assertEqual(normalize_seq("N\rN\rN\rN", iupac=False), "NNNN")
+
+    def test_non_alphabet_characters(self):
+        self.assertEqual(normalize_seq("N!N!N!N", iupac=False), "NNNNNNN")
+        self.assertEqual(normalize_seq("N@N@N@N", iupac=False), "NNNNNNN")
+        self.assertEqual(normalize_seq("N#N#N#N", iupac=False), "NNNNNNN")
+        self.assertEqual(normalize_seq("N$N$N$N", iupac=False), "NNNNNNN")
+        self.assertEqual(normalize_seq("N%N%N%N", iupac=False), "NNNNNNN")
+        self.assertEqual(normalize_seq("N^N^N^N", iupac=False), "NNNNNNN")
+        self.assertEqual(normalize_seq("N&N&N&N", iupac=False), "NNNNNNN")
+        self.assertEqual(normalize_seq("N*N*N*N", iupac=False), "NNNNNNN")
+        self.assertEqual(normalize_seq("N|N|N|N", iupac=False), "NNNNNNN")
+        self.assertEqual(normalize_seq("N9N5N1N", iupac=False), "NNNNNNN")
+
+
+class ReverseComplementTestCase(unittest.TestCase):
+    def test_reverse_complement(self):
+        self.assertEqual(reverse_complement("a"), "t")
+        self.assertEqual(reverse_complement("c"), "g")
+        self.assertEqual(reverse_complement("g"), "c")
+        self.assertEqual(reverse_complement("n"), "n")
+        self.assertEqual(reverse_complement("atcg"), "cgat")
+
+
+class FileParsingTestCase(unittest.TestCase):
     def get_fasta_reader(self):
         return parse_fastx_file(FASTA_FILE)
 
@@ -20,17 +160,10 @@ def test_can_parse_fasta_file(self):
                 self.assertEqual(record.id, "test")
                 self.assertEqual(record.seq, "AGCTGATCGA")
                 self.assertIsNone(record.qual)
-                record.normalize(iupac=False)
-                self.assertEqual(record.seq, "AGCTGATCGA")
-                self.assertTrue(record.is_fasta())
             if i == 1:
                 self.assertEqual(record.id, "test2")
                 self.assertEqual(record.seq, "TAGC")
                 self.assertIsNone(record.qual)
-                record.normalize(iupac=False)
-                self.assertEqual(record.seq, "TAGC")
-                self.assertTrue(record.is_fasta())
-
             self.assertTrue(i <= 1)
 
     def test_can_parse_fastq_file(self):
@@ -39,21 +172,17 @@ def test_can_parse_fastq_file(self):
                 self.assertEqual(record.id, "EAS54_6_R1_2_1_413_324")
                 self.assertEqual(record.seq, "CCCTTCTTGTCTTCAGCGTTTCTCC")
                 self.assertEqual(record.qual, ";;3;;;;;;;;;;;;7;;;;;;;88")
-                record.normalize(iupac=False)
-                self.assertEqual(record.seq, "CCCTTCTTGTCTTCAGCGTTTCTCC")
-                self.assertTrue(record.is_fastq())
             if i == 1:
                 self.assertEqual(record.id, "EAS54_6_R1_2_1_540_792")
                 self.assertEqual(record.seq, "TTGGCAGGCCAAGGCCGATGGATCA")
                 self.assertEqual(record.qual, ";;;;;;;;;;;7;;;;;-;;;3;83")
-                record.normalize(iupac=False)
-                self.assertEqual(record.seq, "TTGGCAGGCCAAGGCCGATGGATCA")
-                self.assertTrue(record.is_fastq())
-
             self.assertTrue(i <= 2)
 
+    def test_pathlib_path_input(self):
+        parse_fastx_file(Path(FASTA_FILE))
+
 
-class ParsingStrTestCase(ParsingTestCase):
+class StrParsingTestCase(FileParsingTestCase):
     def get_fasta_reader(self):
         with open(FASTA_FILE) as f:
             content = f.read()
@@ -64,22 +193,8 @@ def get_fastq_reader(self):
             content = f.read()
             return parse_fastx_string(content)
 
-
-class MiscelleanousTestCase(unittest.TestCase):
-    def test_normalize_seq(self):
-        self.assertEqual(normalize_seq("ACGTU", iupac=False), "ACGTT")
-        self.assertEqual(normalize_seq("acgtu", iupac=False), "ACGTT")
-        self.assertEqual(normalize_seq("N.N-N~N N", iupac=False), "N-N-N-NN")
-        self.assertEqual(normalize_seq("BDHVRYSWKM", iupac=True), "BDHVRYSWKM")
-        self.assertEqual(normalize_seq("bdhvryswkm", iupac=True), "BDHVRYSWKM")
-
-    def test_reverse_complement(self):
-        self.assertEqual(reverse_complement("a"), "t")
-        self.assertEqual(reverse_complement("c"), "g")
-        self.assertEqual(reverse_complement("g"), "c")
-        self.assertEqual(reverse_complement("n"), "n")
-
-        self.assertEqual(reverse_complement("atcg"), "cgat")
+    def test_pathlib_path_input(self):
+        pass
 
 
 class ErroringTestCase(unittest.TestCase):
@@ -92,5 +207,6 @@ def test_invalid_record(self):
             for i, record in enumerate(parse_fastx_string("Not a valid file")):
                 print(i)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     unittest.main()

From 4de47d9053c477d2f16fce4d85a03445b59e2993 Mon Sep 17 00:00:00 2001
From: Antonio Camargo <antoniop.camargo@gmail.com>
Date: Fri, 24 Jan 2025 15:01:39 -0800
Subject: [PATCH 21/24] Fix linting issue

---
 src/python.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/python.rs b/src/python.rs
index 93a9d1f..6888c24 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -315,13 +315,13 @@ fn parse_fastx_string(content: &str) -> PyResult<PyFastxReader> {
 
 /// Normalize the sequence string of nucleotide records by:
 ///
-///     - Converting lowercase characters to uppercase.
-///     - Removing whitespace and newline characters.
-///     - Replacing 'U' with 'T'.
-///     - Replacing '.' and '~' with '-'.
-///     - Replacing characters not in 'ACGTN-' with 'N', unless `iupac` is set
-///       to `True`, in which case characters representing nucleotide ambiguity
-///       are not replaced.
+/// - Converting lowercase characters to uppercase.
+/// - Removing whitespace and newline characters.
+/// - Replacing 'U' with 'T'.
+/// - Replacing '.' and '~' with '-'.
+/// - Replacing characters not in 'ACGTN-' with 'N', unless `iupac` is `True`,
+///   in which case characters representing nucleotide ambiguity are not
+///   replaced.
 ///
 /// Parameters
 /// ----------

From f16c10dffd14d2fd2a45c0176216d2aa7f05369c Mon Sep 17 00:00:00 2001
From: Antonio Camargo <antoniop.camargo@gmail.com>
Date: Mon, 27 Jan 2025 12:40:39 -0800
Subject: [PATCH 22/24] Use assertIsInstance in test_pathlib_path_input

---
 test_python.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test_python.py b/test_python.py
index c154ae5..a8c4aaf 100644
--- a/test_python.py
+++ b/test_python.py
@@ -3,6 +3,7 @@
 
 from needletail import (
     NeedletailError,
+    PyFastxReader,
     Record,
     normalize_seq,
     parse_fastx_file,
@@ -179,7 +180,7 @@ def test_can_parse_fastq_file(self):
             self.assertTrue(i <= 2)
 
     def test_pathlib_path_input(self):
-        parse_fastx_file(Path(FASTA_FILE))
+        self.assertIsInstance(parse_fastx_file(Path(FASTA_FILE)), PyFastxReader)
 
 
 class StrParsingTestCase(FileParsingTestCase):

From 3435cf5e1b2c3f938ca2559b2a8f8e2f3277a176 Mon Sep 17 00:00:00 2001
From: Antonio Camargo <antoniop.camargo@gmail.com>
Date: Mon, 27 Jan 2025 12:40:58 -0800
Subject: [PATCH 23/24] Add __pycache__ to .gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 1cb70a9..7484571 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,5 @@ Cargo.lock
 venv/
 .DS_Store
 .idea/
-test.py
\ No newline at end of file
+test.py
+__pycache__/

From 94423ff08fdd02861668305d5bf66d079ef0a217 Mon Sep 17 00:00:00 2001
From: Antonio Camargo <antoniop.camargo@gmail.com>
Date: Mon, 27 Jan 2025 12:56:24 -0800
Subject: [PATCH 24/24] Make FileParsingTestCase inherit from
 StrParsingTestCase

---
 test_python.py | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/test_python.py b/test_python.py
index a8c4aaf..f644e50 100644
--- a/test_python.py
+++ b/test_python.py
@@ -3,7 +3,6 @@
 
 from needletail import (
     NeedletailError,
-    PyFastxReader,
     Record,
     normalize_seq,
     parse_fastx_file,
@@ -148,12 +147,16 @@ def test_reverse_complement(self):
         self.assertEqual(reverse_complement("atcg"), "cgat")
 
 
-class FileParsingTestCase(unittest.TestCase):
+class StrParsingTestCase(unittest.TestCase):
     def get_fasta_reader(self):
-        return parse_fastx_file(FASTA_FILE)
+        with open(FASTA_FILE) as f:
+            content = f.read()
+            return parse_fastx_string(content)
 
     def get_fastq_reader(self):
-        return parse_fastx_file(FASTQ_FILE)
+        with open(FASTQ_FILE) as f:
+            content = f.read()
+            return parse_fastx_string(content)
 
     def test_can_parse_fasta_file(self):
         for i, record in enumerate(self.get_fasta_reader()):
@@ -179,23 +182,16 @@ def test_can_parse_fastq_file(self):
                 self.assertEqual(record.qual, ";;;;;;;;;;;7;;;;;-;;;3;83")
             self.assertTrue(i <= 2)
 
-    def test_pathlib_path_input(self):
-        self.assertIsInstance(parse_fastx_file(Path(FASTA_FILE)), PyFastxReader)
-
 
-class StrParsingTestCase(FileParsingTestCase):
+class FileParsingTestCase(StrParsingTestCase):
     def get_fasta_reader(self):
-        with open(FASTA_FILE) as f:
-            content = f.read()
-            return parse_fastx_string(content)
+        return parse_fastx_file(FASTA_FILE)
 
     def get_fastq_reader(self):
-        with open(FASTQ_FILE) as f:
-            content = f.read()
-            return parse_fastx_string(content)
+        return parse_fastx_file(FASTQ_FILE)
 
     def test_pathlib_path_input(self):
-        pass
+        parse_fastx_file(Path(FASTA_FILE))
 
 
 class ErroringTestCase(unittest.TestCase):