Skip to content

Commit

Permalink
Update assembly parameter, add chromosome as an optional parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
jarbesfeld committed Jan 6, 2025
1 parent 232673d commit 1c03cac
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 23 deletions.
38 changes: 22 additions & 16 deletions src/cool_seq_tool/mappers/exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ async def genomic_to_tx_segment(
transcript: str | None = None,
gene: str | None = None,
coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
assembly: Assembly = Assembly.GRCH38,
starting_assembly: Assembly = Assembly.GRCH38,
) -> GenomicTxSegService:
"""Get transcript segment data for genomic data, lifted over to GRCh38.
Expand Down Expand Up @@ -453,8 +453,8 @@ async def genomic_to_tx_segment(
value is provided.
:param coordinate_type: Coordinate type for ``seg_start_genomic`` and
``seg_end_genomic``. Expects inter-residue coordinates by default
:param assembly: The assembly that the supplied coordinate comes from. Set to
GRCh38 be default
:param starting_assembly: The assembly that the supplied coordinate comes from. Set to
GRCh38 by default
:return: Genomic data (inter-residue coordinates)
"""
errors = []
Expand All @@ -480,7 +480,7 @@ async def genomic_to_tx_segment(
gene=gene,
is_seg_start=True,
coordinate_type=coordinate_type,
assembly=assembly,
starting_assembly=starting_assembly,
)
if start_tx_seg_data.errors:
return _return_service_errors(start_tx_seg_data.errors)
Expand All @@ -501,7 +501,7 @@ async def genomic_to_tx_segment(
gene=gene,
is_seg_start=False,
coordinate_type=coordinate_type,
assembly=assembly,
starting_assembly=starting_assembly,
)
if end_tx_seg_data.errors:
return _return_service_errors(end_tx_seg_data.errors)
Expand Down Expand Up @@ -732,7 +732,7 @@ async def _genomic_to_tx_segment(
gene: str | None = None,
is_seg_start: bool = True,
coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
assembly: Assembly = Assembly.GRCH38,
starting_assembly: Assembly = Assembly.GRCH38,
) -> GenomicTxSeg:
"""Given genomic data, generate a boundary for a transcript segment.
Expand All @@ -755,8 +755,8 @@ async def _genomic_to_tx_segment(
``False`` if ``genomic_pos`` is where the transcript segment ends.
:param coordinate_type: Coordinate type for ``seg_start_genomic`` and
``seg_end_genomic``. Expects inter-residue coordinates by default
:param assembly: The assembly that the supplied coordinate comes from. Set to
GRCh38 be default
:param starting_assembly: The assembly that the supplied coordinate comes from. Set to
GRCh38 by default
:return: Data for a transcript segment boundary (inter-residue coordinates)
"""
params = {key: None for key in GenomicTxSeg.model_fields}
Expand All @@ -779,7 +779,7 @@ async def _genomic_to_tx_segment(
genomic_ac_validation = await self.uta_db.validate_genomic_ac(genomic_ac)
if not genomic_ac_validation:
return GenomicTxSeg(errors=[f"{genomic_ac} does not exist in UTA"])
if assembly == Assembly.GRCH37:
if starting_assembly == Assembly.GRCH37:
grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac)
genomic_ac = grch38_ac[0]
else:
Expand All @@ -792,8 +792,10 @@ async def _genomic_to_tx_segment(
genomic_ac = genomic_acs[0]

# Liftover to GRCh38 if the provided assembly is GRCh37
if assembly == Assembly.GRCH37:
genomic_pos = await self._get_grch38_pos(genomic_pos, genomic_ac)
if starting_assembly == Assembly.GRCH37:
genomic_pos = await self._get_grch38_pos(
genomic_pos, genomic_ac, chromosome=chromosome if chromosome else None
)
if not genomic_pos:
return GenomicTxSeg(
errors=[
Expand Down Expand Up @@ -910,17 +912,21 @@ async def _genomic_to_tx_segment(
tx_ac=transcript,
)

async def _get_grch38_pos(self, genomic_pos: int, genomic_ac: str) -> int | None:
async def _get_grch38_pos(
self, genomic_pos: int, genomic_ac: str, chromosome: str | None = None
) -> int | None:
"""Liftover a GRCh37 coordinate to GRCh38
:param genomic_pos: A genomic coordinate in GRCh37
:param genomic_ac: The genomic accession in GRCh38
:param chromosome: The chromosome that genomic_pos occurs on
:return The genomic coordinate in GRCh38
"""
chromosome, _ = self.seqrepo_access.translate_identifier(
genomic_ac, target_namespaces=Assembly.GRCH38.value
)
chromosome = chromosome[-1].split(":")[-1]
if not chromosome:
chromosome, _ = self.seqrepo_access.translate_identifier(
genomic_ac, target_namespaces=Assembly.GRCH38.value
)
chromosome = chromosome[-1].split(":")[-1]
liftover_data = self.liftover.get_liftover(
chromosome, genomic_pos, Assembly.GRCH38
)
Expand Down
19 changes: 12 additions & 7 deletions tests/mappers/test_exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,11 @@ async def test_get_grch38_pos(test_egc_mapper):
genomic_pos = await test_egc_mapper._get_grch38_pos(9609996, "NC_000011.10")
assert genomic_pos == 9588449

genomic_pos = await test_egc_mapper._get_grch38_pos(
9609996, "NC_000011.10", "chr11"
)
assert genomic_pos == 9588449

genomic_pos = await test_egc_mapper._get_grch38_pos(
9609996999999999, "NC_000011.10"
)
Expand Down Expand Up @@ -1236,7 +1241,7 @@ async def test_braf(test_egc_mapper, mane_braf):
"seg_start_genomic": 140501359, # GRCh38 coords: 140801559
"seg_end_genomic": 140453136, # GRCh38 coords: 140753336
"gene": "BRAF",
"assembly": Assembly.GRCH37.value,
"starting_assembly": Assembly.GRCH37.value,
}
# MANE
g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
Expand All @@ -1260,7 +1265,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11):
"seg_start_genomic": 9597639,
"seg_end_genomic": 9609996,
"transcript": "NM_003390.3",
"assembly": Assembly.GRCH37.value,
"starting_assembly": Assembly.GRCH37.value,
}
g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
genomic_tx_seg_service_checks(g_to_t_resp, wee1_exon2_exon11)
Expand All @@ -1277,7 +1282,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11):
"seg_end_genomic": 9609996,
"transcript": "NM_003390.3",
"gene": "WEE1",
"assembly": Assembly.GRCH37.value,
"starting_assembly": Assembly.GRCH37.value,
}
g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
genomic_tx_seg_service_checks(g_to_t_resp, wee1_exon2_exon11)
Expand All @@ -1293,7 +1298,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11):
"seg_start_genomic": 9597639, # GRCh38 coords: 9576092
"seg_end_genomic": 9609996, # GRCh38 coords: 9588449
"gene": "WEE1",
"assembly": Assembly.GRCH37.value,
"starting_assembly": Assembly.GRCH37.value,
}
g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
genomic_tx_seg_service_checks(g_to_t_resp, mane_wee1_exon2_exon11)
Expand Down Expand Up @@ -1405,7 +1410,7 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic):
"gene": "WEE1",
"genomic_ac": "NC_000011.9",
"seg_end_genomic": 9609996,
"assembly": Assembly.GRCH37.value,
"starting_assembly": Assembly.GRCH37.value,
}
resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end))
Expand All @@ -1414,7 +1419,7 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic):
"gene": "WEE1",
"chromosome": "11",
"seg_end_genomic": 9609996,
"assembly": Assembly.GRCH37.value,
"starting_assembly": Assembly.GRCH37.value,
}
resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end))
Expand Down Expand Up @@ -1449,7 +1454,7 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic):
seg_start_genomic=73442503,
seg_end_genomic=73457929, # not on an exon
gene="ELN",
assembly=Assembly.GRCH37.value,
starting_assembly=Assembly.GRCH37.value,
)
genomic_tx_seg_service_checks(resp, eln_grch38_intronic)

Expand Down

0 comments on commit 1c03cac

Please sign in to comment.