Skip to content

Commit

Permalink
feat: Reorganize pyft api and docs
Browse files Browse the repository at this point in the history
  • Loading branch information
mrvollger committed Jul 23, 2023
1 parent 8fbdc7c commit 9da4fa3
Show file tree
Hide file tree
Showing 8 changed files with 115 additions and 76 deletions.
2 changes: 1 addition & 1 deletion py-ft/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
edition = "2021"
name = "pyft"
version = "0.1.4"
version = "0.1.6"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
Expand Down
35 changes: 1 addition & 34 deletions py-ft/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,4 @@
[![Documentation Status](https://readthedocs.org/projects/py-ft/badge/?version=latest)](https://py-ft.readthedocs.io/en/latest/?badge=latest)
[![PyPI version](https://badge.fury.io/py/pyft.svg)](https://badge.fury.io/py/pyft)

`pyft` provides a python API for the rust library [fibertools-rs](https://github.com/fiberseq/fibertools-rs). The inspiration for this API is to make analysis in python easier and faster; therefore, only extraction of data from a fiberseq bam is supported and not writing.

# Install
```bash
pip install pyft
```


# Example
```python
import pyft
# bam file with ~3X coverage of chr20
bam_f = "../tmp.bam"
fiberdata = pyft.FiberdataFetch(bam_f, "chr20", 0, 10_000_000)
for idx, fiber in enumerate(fiberdata):
if idx < 10:
# print some info about the fiber
print(fiber)
# the number of ccs passes
fiber.ec
# the mps start positions
fiber.msp.starts
# print the nuc reference starts
fiber.nuc.reference_starts
# lift query (fiber) positions to reference positions
fiber.lift_query_positions([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# lift reference positions to query (fiber) positions
fiber.lift_reference_positions([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# aligned blocks between query (fiber) and reference
fiber.get_aligned_blocks_as_ranges()
```

# Documentation
The documentation for `pyft` can be found on [readthedocs](https://py-ft.readthedocs.io/en/latest/).
See the documentation for `pyft` on [readthedocs](https://py-ft.readthedocs.io/en/latest/) for more information.
9 changes: 6 additions & 3 deletions py-ft/docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
import os
import sphinx
import re
import sys
import sphinx.ext.autosummary as autosummary
#import sphinx
#import sphinx.ext.autosummary as autosummary
sys.path.insert(0, os.path.abspath("../"))
import pyft

Expand All @@ -25,6 +25,7 @@

extensions = [
"sphinx.ext.autodoc",
"sphinx_autodoc_typehints",
"sphinx.ext.viewcode",
#"sphinx.ext.napoleon",
"sphinx_rtd_theme",
Expand All @@ -43,8 +44,10 @@
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

# html_theme = "alabaster"
#html_theme = "alabaster"
html_theme = "sphinx_rtd_theme"
#html_permalinks_icon = '<span>#</span>'
#html_theme = 'sphinxawesome_theme'
html_static_path = ["_static"]
html_css_files = [
"css/rtd_dark.css",
Expand Down
28 changes: 23 additions & 5 deletions py-ft/docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,35 @@
sphinx-quickstart on Wed Jul 19 20:00:42 2023.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to pyft's documentation!
=================================

.. toctree::
:maxdepth: 2
:caption: Contents:

pyft: python bindings for fibertools-rs
=======================================
.. image:: https://readthedocs.org/projects/py-ft/badge/?version=latest
:target: https://py-ft.readthedocs.io/en/latest/?badge=latest
:alt: Documentation Status

.. image:: https://badge.fury.io/py/pyft.svg
:target: https://badge.fury.io/py/pyft


**pyft** provides a python API for the rust library `fibertools-rs <https://github.com/fiberseq/fibertools-rs>`_. The inspiration for this API is to make analysis in python easier and faster; therefore, only extraction of data from a fiberseq bam is supported and not writing.

Install
=======
.. code-block:: bash
pip install pyft
.. mdinclude:: ../README.md
Example
=======
.. highlight:: python
.. include:: ../example.py
:literal:
.. highlight:: none

API Reference
==================
Expand Down
7 changes: 6 additions & 1 deletion py-ft/docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
m2r2
sphinx_bootstrap_theme
sphinx_bootstrap_theme
sphinx<7.0.0
sphinx-autodoc-typehints
sphinx-rtd-theme
docutils
#sphinxawesome-theme
21 changes: 12 additions & 9 deletions py-ft/example.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import pyft
import tqdm
import sys

bam_f = "../tmp.bam"
fiberdata = pyft.FiberdataFetch(bam_f, "chr20", 0, 10_000_000)
for idx, fiber in enumerate(tqdm.tqdm(fiberdata)):
if idx < 10:
print(fiber, file=sys.stderr)

fiber.get_aligned_blocks_as_ranges()
fiberbam = pyft.Fiberbam(bam_f)
for fiber in tqdm.tqdm(fiberbam.fetch("chr20", 0, 10_000_000)):
# the number of ccs passes
fiber.ec
# the mps start positions
fiber.msp.starts
# print the nuc reference starts
fiber.nuc.reference_starts
# lift query (fiber) positions to reference positions
fiber.lift_query_positions([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# lift reference positions to query (fiber) positions
fiber.lift_reference_positions([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


# aligned blocks between query (fiber) and reference
fiber.get_aligned_blocks_as_ranges()
86 changes: 64 additions & 22 deletions py-ft/src/fiberdata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use fibertools_rs::extract::FiberseqData;
use pyo3::iter::IterNextOutput;
use pyo3::prelude::*;
use rust_htslib::{bam, bam::ext::BamRecordExtensions, bam::record::Aux, bam::Read};
use std::time;
use std::vec::IntoIter;

#[pyclass]
Expand Down Expand Up @@ -193,48 +194,89 @@ fn new_py_fiberdata(fiber: FiberseqData) -> Fiberdata {
/// Create a fibertools iterator from an indexed bam file.
/// Must provide a valid chrom, start, and end.
/// Returns an iterator over :class:`~pyft.Fiberdata` objects.
pub struct FiberdataFetch {
fiberdata: IntoIter<FiberseqData>,
//_inner: Rc<RefCell<bam::IndexedReader>>,
//fiberdata: Box<dyn Iterator<Item = FiberseqData> + Send + 'static>,
//fiberdata: Box<dyn Iterator<Item = FiberseqData> + Send + 'static>,
pub struct Fiberbam {
bam: bam::IndexedReader,
header: bam::Header,
start: time::Instant,
}

#[pymethods]
impl FiberdataFetch {
/// Open a fiberseq bam file. Must have an index.
impl Fiberbam {
#[new]
pub fn new(f: &str, chrom: &str, start: i64, end: i64) -> Self {
pub fn new(f: &str) -> Self {
let mut bam = bam::IndexedReader::from_path(f).expect("unable to open indexed bam file");
bam.set_threads(8).unwrap();
let header = bam::Header::from_template(bam.header());
let head_view = bam::HeaderView::from_header(&header);
bam.fetch((chrom, start, end))
let start = time::Instant::now();
Self { bam, header, start }
}

/// Returns an iterator over :class:`~pyft.Fiberdata` objects for the selected region.
pub fn fetch(&mut self, chrom: &str, start: i64, end: i64) -> Fiberiter {
let head_view = bam::HeaderView::from_header(&self.header);
self.bam
.fetch((chrom, start, end))
.expect("unable to fetch region");
let records: Vec<bam::Record> = bam.records().map(|r| r.unwrap()).collect();
log::info!("{} records fetched", records.len());
let fiberdata = FiberseqData::from_records(records, &head_view, 0).into_iter();
log::info!("fiberdata created from records");
/*let fiberdata = bam
.records()
.map(|rec| FiberseqData::new(&rec.unwrap(), None, 0))
.into_iter();*/
Self { fiberdata }
let records: Vec<bam::Record> = self.bam.records().map(|r| r.unwrap()).collect();

log::info!(
"{} records fetched in {:.2}s",
records.len(),
self.time_from_last()
);
let fiberdata = FiberseqData::from_records(records, &head_view, 0);
log::info!(
"Fiberdata made for {} records in {:.2}s",
fiberdata.len(),
self.time_from_last()
);
build_fiberdata_iter(fiberdata)
}

fn time_from_last(&mut self) -> f64 {
let elapsed = self.start.elapsed().as_secs_f64();
self.start = time::Instant::now();
elapsed
}
}

fn build_fiberdata_iter(fiberdata: Vec<FiberseqData>) -> Fiberiter {
let length = fiberdata.len();
Fiberiter {
fiberdata: fiberdata.into_iter(),
length,
}
}

#[pyclass]
/// An iterator over :class:`~pyft.Fiberdata` objects.
pub struct Fiberiter {
fiberdata: IntoIter<FiberseqData>,
length: usize,
}

#[pymethods]
impl Fiberiter {
fn __next__(&mut self) -> IterNextOutput<Fiberdata, &'static str> {
let data = self.fiberdata.next();
match data {
Some(fiber) => IterNextOutput::Yield(new_py_fiberdata(fiber)),
None => {
log::info!("\n\nDone iterating over fibers");
IterNextOutput::Return("Ended")
}
None => IterNextOutput::Return("Ended"),
}
}

fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
slf
}

fn __len__(&self) -> usize {
self.length
}

fn len(&self) -> usize {
self.length
}
}

/// Class for describing base modifications within fiberseq data.
Expand Down
3 changes: 2 additions & 1 deletion py-ft/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ fn pyft(_py: Python, m: &PyModule) -> PyResult<()> {
.init();

m.add("__version__", env!("CARGO_PKG_VERSION"))?;
m.add_class::<fiberdata::FiberdataFetch>()?;
m.add_class::<fiberdata::Fiberbam>()?;
m.add_class::<fiberdata::Fiberiter>()?;
m.add_class::<fiberdata::Fiberdata>()?;
m.add_class::<fiberdata::Basemods>()?;
m.add_class::<fiberdata::Ranges>()?;
Expand Down

0 comments on commit 9da4fa3

Please sign in to comment.