Skip to content

Commit

Permalink
Merge branch 'pod5' into 'master'
Browse files Browse the repository at this point in the history
POD5

See merge request machine-learning/bonito!77
  • Loading branch information
iiSeymour committed May 19, 2022
2 parents cc14da8 + e14a9d7 commit c0f83f3
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 24 deletions.
34 changes: 17 additions & 17 deletions bonito/mkr.py → bonito/pod5.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Bonito MKR Utils
Bonito POD5 Utils
"""

from glob import glob
Expand All @@ -10,7 +10,7 @@
import numpy as np
import bonito.reader
from tqdm import tqdm
from mkr_format import open_combined_file
from pod5_format import open_combined_file


class Read(bonito.reader.Read):
Expand Down Expand Up @@ -72,34 +72,34 @@ def __init__(self, read, filename, meta=False):
self.signal = bonito.reader.norm_by_noisiest_section(scaled)


def mkr_reads(mkr_file, read_ids, skip=False):
def pod5_reads(pod5_file, read_ids, skip=False):
"""
Get all the reads from the `mkr_file`.
Get all the reads from the `pod5_file`.
"""
if read_ids is None:
yield from open_combined_file(mkr_file).reads()
yield from open_combined_file(pod5_file).reads()
elif skip:
for read in open_combined_file(mkr_file).reads():
for read in open_combined_file(pod5_file).reads():
if str(read.read_id) not in read_ids:
yield read
else:
yield from open_combined_file(mkr_file).select_reads({UUID(rid) for rid in read_ids}, missing_ok=True)
yield from open_combined_file(pod5_file).select_reads({UUID(rid) for rid in read_ids}, missing_ok=True)


def get_read_groups(directory, model, read_ids=None, skip=False, n_proc=1, recursive=False, cancel=None):
"""
Get all the read meta data for a given `directory`.
"""
groups = set()
pattern = "**/*.mkr" if recursive else "*.mkr"
mkr_files = (Path(x) for x in glob(directory + "/" + pattern, recursive=True))
pattern = "**/*.pod5" if recursive else "*.pod5"
pod5_files = (Path(x) for x in glob(directory + "/" + pattern, recursive=True))

for mkr_file in mkr_files:
for pod5_file in pod5_files:
for read in tqdm(
mkr_reads(mkr_file, read_ids, skip),
pod5_reads(pod5_file, read_ids, skip),
leave=False, desc="> preprocessing reads", unit=" reads/s", ascii=True, ncols=100
):
read = Read(read, mkr_file, meta=True)
read = Read(read, pod5_file, meta=True)
groups.add(read.readgroup(model))
return groups

Expand All @@ -108,11 +108,11 @@ def get_reads(directory, read_ids=None, skip=False, n_proc=1, recursive=False, c
"""
Get all reads in a given `directory`.
"""
pattern = "**/*.mkr" if recursive else "*.mkr"
mkr_files = (Path(x) for x in glob(directory + "/" + pattern, recursive=True))
pattern = "**/*.pod5" if recursive else "*.pod5"
pod5_files = (Path(x) for x in glob(directory + "/" + pattern, recursive=True))

for mkr_file in mkr_files:
for read in mkr_reads(mkr_file, read_ids, skip):
yield Read(read, mkr_file)
for pod5_file in pod5_files:
for read in pod5_reads(pod5_file, read_ids, skip):
yield Read(read, pod5_file)
if cancel is not None and cancel.is_set():
return
2 changes: 1 addition & 1 deletion bonito/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from scipy.signal import find_peaks


__formats__ = ["fast5", "mkr"]
__formats__ = ["fast5", "pod5"]


class Reader:
Expand Down
4 changes: 2 additions & 2 deletions requirements-cuda111.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ ont-koi==0.0.9
onnxruntime==1.11.0
ont-remora==1.1.0
ont-fast5-api==3.3.0
mkr-format==0.0.11
mkr-format-tools==0.0.11
pod5-format==0.0.14
pod5-format-tools==0.0.14
fast-ctc-decode==0.3.2
python-dateutil==2.8.2
# cuda requirements
Expand Down
4 changes: 2 additions & 2 deletions requirements-cuda113.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ ont-koi==0.0.9
onnxruntime==1.11.0
ont-remora==1.1.0
ont-fast5-api==3.3.0
mkr-format==0.0.11
mkr-format-tools==0.0.11
pod5-format==0.0.14
pod5-format-tools==0.0.14
fast-ctc-decode==0.3.2
python-dateutil==2.8.2
# cuda requirements
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ ont-koi==0.0.9
onnxruntime==1.11.0
ont-remora==1.1.0
ont-fast5-api==3.3.0
mkr-format==0.0.11
mkr-format-tools==0.0.11
pod5-format==0.0.14
pod5-format-tools==0.0.14
fast-ctc-decode==0.3.2
python-dateutil==2.8.2
# cuda requirements
Expand Down

0 comments on commit c0f83f3

Please sign in to comment.