Skip to content

Commit

Permalink
feat: read from stdin and write to stdout
Browse files Browse the repository at this point in the history
chore: type hints

fix: check for paths and remove default file_type for stdout

doc: add docs

Update fgpyo/sam/__init__.py

Co-authored-by: Nils Homer <[email protected]>

Update fgpyo/sam/__init__.py

Co-authored-by: Nils Homer <[email protected]>

Update fgpyo/sam/__init__.py

Co-authored-by: Nils Homer <[email protected]>

Update fgpyo/sam/__init__.py

Co-authored-by: Nils Homer <[email protected]>

Update fgpyo/sam/__init__.py

Co-authored-by: Nils Homer <[email protected]>

Update fgpyo/sam/__init__.py

Co-authored-by: Nils Homer <[email protected]>

chore: line wrap docstrings
  • Loading branch information
msto committed Jun 6, 2024
1 parent f4d5444 commit b608e14
Showing 1 changed file with 30 additions and 4 deletions.
34 changes: 30 additions & 4 deletions fgpyo/sam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@

import enum
import io
import sys
from pathlib import Path
from typing import IO
from typing import Any
Expand Down Expand Up @@ -189,6 +190,12 @@
_IOClasses = (io.TextIOBase, io.BufferedIOBase, io.RawIOBase, io.IOBase)
"""The classes that should be treated as file-like classes"""

_STDIN_PATHS: List[str] = ["-", "stdin", "/dev/stdin"]
"""Paths that should be opened as stdin."""

_STDOUT_PATHS: List[str] = ["-", "stdout", "/dev/stdout"]
"""Paths that should be opened as stdout."""


@enum.unique
class SamFileType(enum.Enum):
Expand Down Expand Up @@ -230,6 +237,12 @@ def _pysam_open(
) -> SamFile:
"""Opens a SAM/BAM/CRAM for reading or writing.
This function permits reading from standard input and writing to standard output. The specified
path may be the UNIX conventional `"-"`, the more explicit `"stdin"` or `"stdout"`, or an
absolute path to either of the standard streams `"/dev/stdin"` or `"/dev/stdout"`.
When writing to standard output, the file type must be specified.
Args:
path: a file handle or path to the SAM/BAM/CRAM to read or write.
open_for_reading: True to open for reading, false otherwise.
Expand All @@ -241,8 +254,14 @@ def _pysam_open(
"""

if isinstance(path, (str, Path)): # type: ignore
file_type = file_type or SamFileType.from_path(path)
path = str(path)
if str(path) in _STDIN_PATHS and open_for_reading:
path = sys.stdin
elif str(path) in _STDOUT_PATHS and not open_for_reading:
assert file_type is not None, "Must specify file_type when writing to standard output"
path = sys.stdout
else:
file_type = file_type or SamFileType.from_path(path)
path = str(path)
elif not isinstance(path, _IOClasses): # type: ignore
open_type = "reading" if open_for_reading else "writing"
raise TypeError(f"Cannot open '{type(path)}' for {open_type}.")
Expand Down Expand Up @@ -274,6 +293,9 @@ def reader(
) -> SamFile:
"""Opens a SAM/BAM/CRAM for reading.
To read from standard input, provide any of `"-"`, `"stdin"`, or `"/dev/stdin"` as the input
`path`.
Args:
path: a file handle or path to the SAM/BAM/CRAM to read or write.
file_type: the file type to assume when opening the file. If None, then the file
Expand All @@ -290,15 +312,19 @@ def writer(
) -> SamFile:
"""Opens a SAM/BAM/CRAM for writing.
To write to standard output, provide any of `"-"`, `"stdout"`, or `"/dev/stdout"` as the output
`path`. **Note**: When writing to `stdout`, the `file_type` _must_ be given.
Args:
path: a file handle or path to the SAM/BAM/CRAM to read or write.
header: Either a string to use for the header or a multi-level dictionary. The
multi-level dictionary should be given as follows. The first level are the four
types (‘HD’, ‘SQ’, ...). The second level are a list of lines, with each line being
a list of tag-value pairs. The header is constructed first from all the defined
fields, followed by user tags in alphabetical order.
file_type: the file type to assume when opening the file. If None, then the
filetype will be auto-detected and must be a path-like object.
file_type: the file type to assume when opening the file. If `None`, then the
filetype will be auto-detected and must be a path-like object. This argument is required
when writing to standard output.
"""
# Set the header for pysam's AlignmentFile
key = "text" if isinstance(header, str) else "header"
Expand Down

0 comments on commit b608e14

Please sign in to comment.