diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py index d8a2afe..0e4a842 100644 --- a/src/cool_seq_tool/mappers/exon_genomic_coords.py +++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py @@ -413,7 +413,7 @@ async def genomic_to_tx_segment( transcript: str | None = None, gene: str | None = None, coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE, - assembly: Assembly = Assembly.GRCH38, + starting_assembly: Assembly = Assembly.GRCH38, ) -> GenomicTxSegService: """Get transcript segment data for genomic data, lifted over to GRCh38. @@ -453,8 +453,8 @@ async def genomic_to_tx_segment( value is provided. :param coordinate_type: Coordinate type for ``seg_start_genomic`` and ``seg_end_genomic``. Expects inter-residue coordinates by default - :param assembly: The assembly that the supplied coordinate comes from. Set to - GRCh38 be default + :param starting_assembly: The assembly that the supplied coordinate comes from. Set to + GRCh38 by default :return: Genomic data (inter-residue coordinates) """ errors = [] @@ -480,7 +480,7 @@ async def genomic_to_tx_segment( gene=gene, is_seg_start=True, coordinate_type=coordinate_type, - assembly=assembly, + starting_assembly=starting_assembly, ) if start_tx_seg_data.errors: return _return_service_errors(start_tx_seg_data.errors) @@ -501,7 +501,7 @@ async def genomic_to_tx_segment( gene=gene, is_seg_start=False, coordinate_type=coordinate_type, - assembly=assembly, + starting_assembly=starting_assembly, ) if end_tx_seg_data.errors: return _return_service_errors(end_tx_seg_data.errors) @@ -732,7 +732,7 @@ async def _genomic_to_tx_segment( gene: str | None = None, is_seg_start: bool = True, coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE, - assembly: Assembly = Assembly.GRCH38, + starting_assembly: Assembly = Assembly.GRCH38, ) -> GenomicTxSeg: """Given genomic data, generate a boundary for a transcript segment. @@ -755,8 +755,8 @@ async def _genomic_to_tx_segment( ``False`` if ``genomic_pos`` is where the transcript segment ends. :param coordinate_type: Coordinate type for ``seg_start_genomic`` and ``seg_end_genomic``. Expects inter-residue coordinates by default - :param assembly: The assembly that the supplied coordinate comes from. Set to - GRCh38 be default + :param starting_assembly: The assembly that the supplied coordinate comes from. Set to + GRCh38 by default :return: Data for a transcript segment boundary (inter-residue coordinates) """ params = {key: None for key in GenomicTxSeg.model_fields} @@ -779,7 +779,7 @@ async def _genomic_to_tx_segment( genomic_ac_validation = await self.uta_db.validate_genomic_ac(genomic_ac) if not genomic_ac_validation: return GenomicTxSeg(errors=[f"{genomic_ac} does not exist in UTA"]) - if assembly == Assembly.GRCH37: + if starting_assembly == Assembly.GRCH37: grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac) genomic_ac = grch38_ac[0] else: @@ -792,8 +792,10 @@ async def _genomic_to_tx_segment( genomic_ac = genomic_acs[0] # Liftover to GRCh38 if the provided assembly is GRCh37 - if assembly == Assembly.GRCH37: - genomic_pos = await self._get_grch38_pos(genomic_pos, genomic_ac) + if starting_assembly == Assembly.GRCH37: + genomic_pos = await self._get_grch38_pos( + genomic_pos, genomic_ac, chromosome=chromosome if chromosome else None + ) if not genomic_pos: return GenomicTxSeg( errors=[ @@ -910,17 +912,21 @@ async def _genomic_to_tx_segment( tx_ac=transcript, ) - async def _get_grch38_pos(self, genomic_pos: int, genomic_ac: str) -> int | None: + async def _get_grch38_pos( + self, genomic_pos: int, genomic_ac: str, chromosome: str | None = None + ) -> int | None: """Liftover a GRCh37 coordinate to GRCh38 :param genomic_pos: A genomic coordinate in GRCh37 :param genomic_ac: The genomic accession in GRCh38 + :param chromosome: The chromosome that genomic_pos occurs on :return The genomic coordinate in GRCh38 """ - chromosome, _ = self.seqrepo_access.translate_identifier( - genomic_ac, target_namespaces=Assembly.GRCH38.value - ) - chromosome = chromosome[-1].split(":")[-1] + if not chromosome: + chromosome, _ = self.seqrepo_access.translate_identifier( + genomic_ac, target_namespaces=Assembly.GRCH38.value + ) + chromosome = chromosome[-1].split(":")[-1] liftover_data = self.liftover.get_liftover( chromosome, genomic_pos, Assembly.GRCH38 ) diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py index d259818..4566068 100644 --- a/tests/mappers/test_exon_genomic_coords.py +++ b/tests/mappers/test_exon_genomic_coords.py @@ -710,6 +710,11 @@ async def test_get_grch38_pos(test_egc_mapper): genomic_pos = await test_egc_mapper._get_grch38_pos(9609996, "NC_000011.10") assert genomic_pos == 9588449 + genomic_pos = await test_egc_mapper._get_grch38_pos( + 9609996, "NC_000011.10", "chr11" + ) + assert genomic_pos == 9588449 + genomic_pos = await test_egc_mapper._get_grch38_pos( 9609996999999999, "NC_000011.10" ) @@ -1236,7 +1241,7 @@ async def test_braf(test_egc_mapper, mane_braf): "seg_start_genomic": 140501359, # GRCh38 coords: 140801559 "seg_end_genomic": 140453136, # GRCh38 coords: 140753336 "gene": "BRAF", - "assembly": Assembly.GRCH37.value, + "starting_assembly": Assembly.GRCH37.value, } # MANE g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs) @@ -1260,7 +1265,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11): "seg_start_genomic": 9597639, "seg_end_genomic": 9609996, "transcript": "NM_003390.3", - "assembly": Assembly.GRCH37.value, + "starting_assembly": Assembly.GRCH37.value, } g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs) genomic_tx_seg_service_checks(g_to_t_resp, wee1_exon2_exon11) @@ -1277,7 +1282,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11): "seg_end_genomic": 9609996, "transcript": "NM_003390.3", "gene": "WEE1", - "assembly": Assembly.GRCH37.value, + "starting_assembly": Assembly.GRCH37.value, } g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs) genomic_tx_seg_service_checks(g_to_t_resp, wee1_exon2_exon11) @@ -1293,7 +1298,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11): "seg_start_genomic": 9597639, # GRCh38 coords: 9576092 "seg_end_genomic": 9609996, # GRCh38 coords: 9588449 "gene": "WEE1", - "assembly": Assembly.GRCH37.value, + "starting_assembly": Assembly.GRCH37.value, } g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs) genomic_tx_seg_service_checks(g_to_t_resp, mane_wee1_exon2_exon11) @@ -1405,7 +1410,7 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic): "gene": "WEE1", "genomic_ac": "NC_000011.9", "seg_end_genomic": 9609996, - "assembly": Assembly.GRCH37.value, + "starting_assembly": Assembly.GRCH37.value, } resp = await test_egc_mapper.genomic_to_tx_segment(**inputs) assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end)) @@ -1414,7 +1419,7 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic): "gene": "WEE1", "chromosome": "11", "seg_end_genomic": 9609996, - "assembly": Assembly.GRCH37.value, + "starting_assembly": Assembly.GRCH37.value, } resp = await test_egc_mapper.genomic_to_tx_segment(**inputs) assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end)) @@ -1449,7 +1454,7 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic): seg_start_genomic=73442503, seg_end_genomic=73457929, # not on an exon gene="ELN", - assembly=Assembly.GRCH37.value, + starting_assembly=Assembly.GRCH37.value, ) genomic_tx_seg_service_checks(resp, eln_grch38_intronic)