diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py index 055384f..faf1ea9 100644 --- a/src/cool_seq_tool/mappers/exon_genomic_coords.py +++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py @@ -720,179 +720,6 @@ def _get_vrs_seq_loc( end=genomic_pos if not use_start else None, ), None - async def _genomic_to_tx_segment_old( - self, - genomic_pos: int, - chromosome: str | None = None, - genomic_ac: str | None = None, - transcript: str | None = None, - gene: str | None = None, - is_seg_start: bool = True, - coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE, - ) -> GenomicTxSeg: - """Given genomic data, generate a boundary for a transcript segment. - - Will liftover to GRCh38 assembly. If liftover is unsuccessful, will return - errors. - - :param genomic_pos: Genomic position where the transcript segment starts or ends - :param chromosome: Chromosome. Must give chromosome without a prefix - (i.e. ``1`` or ``X``). If not provided, must provide ``genomic_ac``. If - position maps to both GRCh37 and GRCh38, GRCh38 assembly will be used. - If ``genomic_ac`` is also provided, ``genomic_ac`` will be used. - :param genomic_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided, - must provide ``chromosome. If ``chromosome`` is also provided, ``genomic_ac`` - will be used. - :param transcript: The transcript to use. If this is not given, we will try the - following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining - Compatible Transcript - :param gene: Valid, case-sensitive HGNC gene symbol - :param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment starts. - ``False`` if ``genomic_pos`` is where the transcript segment ends. - :param coordinate_type: Coordinate type for ``seg_start_genomic`` and - ``seg_end_genomic``. Expects inter-residue coordinates by default - :return: Data for a transcript segment boundary (inter-residue coordinates) - """ - params = {key: None for key in GenomicTxSeg.model_fields} - - if not gene and not transcript: - return GenomicTxSeg( - errors=[ - "`gene` or `transcript` must be provided to select the adjacent transcript junction" - ] - ) - - if not genomic_ac: - for assembly in [Assembly.GRCH38.value, Assembly.GRCH37.value]: - _genomic_acs, err_msg = self.seqrepo_access.translate_identifier( - f"{assembly}:chr{chromosome}", "refseq" - ) - if err_msg: - return GenomicTxSeg(errors=[err_msg]) - genomic_ac = _genomic_acs[0].split(":")[-1] - - # Validate gene symbol exists - if gene: - valid_gene = await self.uta_db.validate_gene_symbol(gene=gene) - if not valid_gene: - return GenomicTxSeg(errors=[f"{gene} does not exist in UTA"]) - - # Always liftover to GRCh38 - genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos( - genomic_ac, genomic_pos - ) - if err_msg: - return GenomicTxSeg(errors=[err_msg]) - - # Validate coordinate is plausible - coordinate_check = await self._validate_gene_coordinates( - pos=genomic_pos, genomic_ac=genomic_ac, gene=gene - ) - if not coordinate_check: - return GenomicTxSeg( - errors=[ - f"{genomic_pos} on {genomic_ac} does not occur within the exons for {gene}" - ] - ) - - if not transcript: - # Select a transcript if not provided - mane_transcripts = self.mane_transcript_mappings.get_gene_mane_data(gene) - - if mane_transcripts: - transcript = mane_transcripts[0]["RefSeq_nuc"] - else: - # Attempt to find a coding transcript if a MANE transcript - # cannot be found - results = await self.uta_db.get_transcripts( - gene=gene, alt_ac=genomic_ac - ) - - if not results.is_empty(): - transcript = results[0]["tx_ac"][0] - else: - # Run if gene is for a noncoding transcript - query = f""" - SELECT DISTINCT tx_ac - FROM {self.uta_db.schema}.tx_exon_aln_v - WHERE hgnc = '{gene}' - AND alt_ac = '{genomic_ac}' - """ # noqa: S608 - result = await self.uta_db.execute_query(query) - - if result: - transcript = result[0]["tx_ac"] - else: - return GenomicTxSeg( - errors=[ - f"Could not find a transcript for {gene} on {genomic_ac}" - ] - ) - - tx_exons = await self._get_all_exon_coords( - tx_ac=transcript, genomic_ac=genomic_ac - ) - if not tx_exons: - return GenomicTxSeg(errors=[f"No exons found given {transcript}"]) - - strand = Strand(tx_exons[0].alt_strand) - params["strand"] = strand - use_alt_start_i = self._use_alt_start_i( - is_seg_start=is_seg_start, strand=strand - ) - if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE: - genomic_pos = genomic_pos - 1 # Convert residue coordinate to inter-residue - - # gene is not required to liftover coordinates if tx_ac and genomic_ac are given, but we should set the associated gene - if not gene: - _gene, err_msg = await self._get_tx_ac_gene(transcript) - if err_msg: - return GenomicTxSeg(errors=[err_msg]) - gene = _gene - - # Check if breakpoint occurs on an exon. - # If not, determine the adjacent exon given the selected transcript - if not self._is_exonic_breakpoint(genomic_pos, tx_exons): - exon_num = self._get_adjacent_exon( - tx_exons_genomic_coords=tx_exons, - strand=strand, - start=genomic_pos if is_seg_start else None, - end=genomic_pos if not is_seg_start else None, - ) - - offset = self._get_exon_offset( - genomic_pos=genomic_pos, - exon_boundary=tx_exons[exon_num].alt_start_i - if use_alt_start_i - else tx_exons[exon_num].alt_end_i, - strand=strand, - ) - - genomic_location, err_msg = self._get_vrs_seq_loc( - genomic_ac, genomic_pos, is_seg_start, strand - ) - if err_msg: - return GenomicTxSeg(errors=[err_msg]) - - return GenomicTxSeg( - gene=gene, - genomic_ac=genomic_ac, - tx_ac=transcript, - seg=TxSegment( - exon_ord=exon_num, - offset=offset, - genomic_location=genomic_location, - ), - ) - - return await self._get_tx_seg_genomic_metadata( - genomic_ac, - genomic_pos, - is_seg_start, - gene, - tx_ac=transcript, - ) - async def _genomic_to_tx_segment( self, genomic_pos: int, @@ -1232,13 +1059,6 @@ async def _get_tx_seg_genomic_metadata( tx_ac = mane_data["RefSeq_nuc"] grch38_ac = mane_data["GRCh38_chr"] - # Always liftover to GRCh38 - genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos( - genomic_ac, genomic_pos, grch38_ac=grch38_ac - ) - if err_msg: - return GenomicTxSeg(errors=[err_msg]) - tx_exons = await self._get_all_exon_coords(tx_ac, genomic_ac=grch38_ac) if not tx_exons: return GenomicTxSeg(errors=[f"No exons found given {tx_ac}"]) diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py index b783b4e..a77158f 100644 --- a/tests/mappers/test_exon_genomic_coords.py +++ b/tests/mappers/test_exon_genomic_coords.py @@ -1437,9 +1437,9 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic): resp = await test_egc_mapper.genomic_to_tx_segment(**inputs) assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end)) - # inputs = {"gene": "WEE1", "chromosome": "11", "seg_end_genomic": 9609996} - # resp = await test_egc_mapper.genomic_to_tx_segment(**inputs) - # assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end)) + inputs = {"gene": "WEE1", "chromosome": "11", "seg_end_genomic": 9588449} + resp = await test_egc_mapper.genomic_to_tx_segment(**inputs) + assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end)) inputs = {"transcript": "NM_003390.3", "exon_start": 2} resp = await test_egc_mapper.tx_segment_to_genomic(**inputs)