From 88bf925cbc0ae80457ecb45b275b358ee2284846 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Fri, 8 Dec 2023 14:58:27 -0500 Subject: [PATCH] more work --- .../sources/mane_transcript_mappings.py | 5 ++++- docs/TranscriptSelectionPriority.md | 22 ------------------- docs/source/index.rst | 1 + docs/source/reference/index.rst | 2 ++ docs/source/transcript_selection.rst | 20 +++++++++++++++++ docs/source/usage.rst | 7 ++++-- 6 files changed, 32 insertions(+), 25 deletions(-) delete mode 100644 docs/TranscriptSelectionPriority.md create mode 100644 docs/source/transcript_selection.rst diff --git a/cool_seq_tool/sources/mane_transcript_mappings.py b/cool_seq_tool/sources/mane_transcript_mappings.py index 94366020..7617e682 100644 --- a/cool_seq_tool/sources/mane_transcript_mappings.py +++ b/cool_seq_tool/sources/mane_transcript_mappings.py @@ -1,4 +1,6 @@ -"""Provide fast tabular access to MANE summary file.""" +"""Provide fast tabular access to MANE summary file. Enables retrieval of associated +MANE transcripts for gene symbols, genomic positions, or transcript accessions. +""" import logging from pathlib import Path from typing import Dict, List @@ -30,6 +32,7 @@ def __init__(self, mane_data_path: Path = MANE_SUMMARY_PATH) -> None: def _load_mane_transcript_data(self) -> pl.DataFrame: """Load RefSeq MANE data file into DataFrame. + :return: DataFrame containing RefSeq MANE Transcript data """ return pl.read_csv(self.mane_data_path, separator="\t") diff --git a/docs/TranscriptSelectionPriority.md b/docs/TranscriptSelectionPriority.md deleted file mode 100644 index 72ef9944..00000000 --- a/docs/TranscriptSelectionPriority.md +++ /dev/null @@ -1,22 +0,0 @@ -# Transcript Selection Policy - -This document contains information on the Transcript Selection Policy. We use this policy for selecting a representative transcript using sequence attributes and MANE annotations from the `mane_transcript` module. - -More information on MANE can be found [here](https://www.ncbi.nlm.nih.gov/refseq/MANE/). - -## Representative transcript priority -We evaluate all compatible transcripts against each of the below criteria, and select the transcript which meets the earliest criterion as representative. - -1. Transcript is annotated as a MANE Select transcript -2. Transcript is annotated as a MANE Plus Clinical transcript -3. Longest Compatible Remaining\ - a. If there is a tie, choose the first-published transcript (lowest-numbered accession for RefSeq/Ensembl) among those transcripts meeting this criterion\ - _Note: We want the most recent version of a transcript associated with an assembly_ - -## Compatible Transcripts - -Compatible transcripts are those that pass validation checks. The checks that we make are: - - - Validating the position exists on an accession - - Validating reference sequences - - Validating exon structure diff --git a/docs/source/index.rst b/docs/source/index.rst index a89f0251..7f0ffd9c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -16,6 +16,7 @@ Description here. Installation Usage + Transcript Selection API Reference Contributing License diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst index 0b641b22..490b7d67 100644 --- a/docs/source/reference/index.rst +++ b/docs/source/reference/index.rst @@ -1,3 +1,5 @@ +.. _api_reference: + API Reference ============= diff --git a/docs/source/transcript_selection.rst b/docs/source/transcript_selection.rst new file mode 100644 index 00000000..1f951b56 --- /dev/null +++ b/docs/source/transcript_selection.rst @@ -0,0 +1,20 @@ +Transcript Selection +==================== + +One of the core uses of Cool-Seq-Tool is to acquire and use consensus, representative transcripts in performing genomic analysis. Here, we describe the selection processes, programmed in the :py:class:`MANETranscript ` class, for choosing the best available transcripts that are compatible with requested data. + +We rely heavily on transcripts annotated under the `Matched Annotation from NCBI and EMBL-EBI (MANE)` Transcripts project. For more information on the MANE project, see the `NCBI MANE page `_. + +Representative transcript priority +---------------------------------- + +All compatible transcripts are evaluated and ordered against the below criteria. The candidate transcript which meets the earliest criterion is chosen as representative. + +#. Transcript is annotated as a `MANE Select` transcript +#. Transcript is annotated as a `MANE Plus Clinical` transcript +#. Transcript is the longest-compatible remaining transcript +#. Transcript is the first-published (lowest-numbered RefSeq/Ensembl accession) remaining transcript + +.. note:: + + We prefer the most recent version of a transcript associated with an assembly. diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 28e1c41a..e9de0096 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -16,13 +16,14 @@ A core :py:class:`CoolSeqTool ` class encapsulate >>> from cool_seq_tool.app import CoolSeqTool >>> cst = CoolSeqTool() -.. _configuration: +Descriptions and examples of functions can be found in the :ref:`API Reference ` section. REST server ----------- -Possibly staged for deletion? +TODO Possibly staged for deletion? +.. _configuration: Environment configuration ------------------------- @@ -52,3 +53,5 @@ Individual classes will accept arguments upon initialization to set parameters r Schema support -------------- + +Many genomic data objects produced by Cool-Seq-Tool are structured in conformance with the `Variation Representation Specification `_, courtesy of the `VRS-Python ` library.