From 2f36fdbd68495a8698cc3f87ea121467da67ca8b Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Wed, 4 Dec 2024 09:53:10 -0500
Subject: [PATCH 01/16] Add work to support residue coords + refactor offset
 calculation

---
 .../mappers/exon_genomic_coords.py            | 112 ++++++++------
 tests/mappers/test_exon_genomic_coords.py     | 139 ++++++++++++++++--
 2 files changed, 196 insertions(+), 55 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index 82d4a72..2717012 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -10,6 +10,7 @@
 from cool_seq_tool.schemas import (
     Assembly,
     BaseModelForbidExtra,
+    CoordinateType,
     ServiceMeta,
     Strand,
 )
@@ -412,6 +413,7 @@ async def genomic_to_tx_segment(
         transcript: str | None = None,
         get_nearest_transcript_junction: bool = False,
         gene: str | None = None,
+        coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
     ) -> GenomicTxSegService:
         """Get transcript segment data for genomic data, lifted over to GRCh38.
 
@@ -485,6 +487,7 @@ async def genomic_to_tx_segment(
                 gene=gene,
                 get_nearest_transcript_junction=get_nearest_transcript_junction,
                 is_seg_start=True,
+                coordinate_type=coordinate_type,
             )
             if start_tx_seg_data.errors:
                 return _return_service_errors(start_tx_seg_data.errors)
@@ -505,6 +508,7 @@ async def genomic_to_tx_segment(
                 gene=gene,
                 get_nearest_transcript_junction=get_nearest_transcript_junction,
                 is_seg_start=False,
+                coordinate_type=coordinate_type,
             )
             if end_tx_seg_data.errors:
                 return _return_service_errors(end_tx_seg_data.errors)
@@ -735,6 +739,7 @@ async def _genomic_to_tx_segment(
         gene: str | None = None,
         get_nearest_transcript_junction: bool = False,
         is_seg_start: bool = True,
+        coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
     ) -> GenomicTxSeg:
         """Given genomic data, generate a boundary for a transcript segment.
 
@@ -763,6 +768,8 @@ async def _genomic_to_tx_segment(
             preceding the breakpoint for the 3' end.
         :param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment starts.
             ``False`` if ``genomic_pos`` is where the transcript segment ends.
+        :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
+            ``seg_end_genomic``
         :return: Data for a transcript segment boundary (inter-residue coordinates)
         """
         params = {key: None for key in GenomicTxSeg.model_fields}
@@ -835,6 +842,13 @@ async def _genomic_to_tx_segment(
 
             strand = Strand(tx_exons[0].alt_strand)
             params["strand"] = strand
+            use_alt_start_i = self._use_alt_start_i(
+                is_seg_start=is_seg_start, strand=strand
+            )
+            if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
+                genomic_pos = (
+                    genomic_pos - 1
+                )  # Convert residue coordinate to inter-residue
 
             # Check if breakpoint occurs on an exon.
             # If not, determine the adjacent exon given the selected transcript
@@ -847,15 +861,11 @@ async def _genomic_to_tx_segment(
                 )
 
                 offset = self._get_exon_offset(
-                    start_i=tx_exons[exon_num].alt_start_i,
-                    end_i=tx_exons[exon_num].alt_end_i,
+                    genomic_pos=genomic_pos,
+                    exon_boundary=tx_exons[exon_num].alt_start_i
+                    if use_alt_start_i
+                    else tx_exons[exon_num].alt_end_i,
                     strand=strand,
-                    use_start_i=strand == Strand.POSITIVE
-                    if is_seg_start
-                    else strand != Strand.POSITIVE,
-                    is_in_exon=False,
-                    start=genomic_pos if is_seg_start else None,
-                    end=genomic_pos if not is_seg_start else None,
                 )
 
                 genomic_location, err_msg = self._get_vrs_seq_loc(
@@ -931,7 +941,12 @@ async def _genomic_to_tx_segment(
                 )
 
         return await self._get_tx_seg_genomic_metadata(
-            genomic_ac, genomic_pos, is_seg_start, gene, tx_ac=transcript
+            genomic_ac,
+            genomic_pos,
+            is_seg_start,
+            gene,
+            tx_ac=transcript,
+            coordinate_type=coordinate_type,
         )
 
     async def _get_grch38_ac_pos(
@@ -1049,6 +1064,7 @@ async def _get_tx_seg_genomic_metadata(
         is_seg_start: bool,
         gene: str,
         tx_ac: str | None,
+        coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
     ) -> GenomicTxSeg:
         """Get transcript segment data and associated genomic metadata.
 
@@ -1110,15 +1126,18 @@ async def _get_tx_seg_genomic_metadata(
             )
 
         tx_exon_aln_data = tx_exon_aln_data[0]
-
+        strand = Strand(tx_exon_aln_data.alt_strand)
+        use_alt_start_i = self._use_alt_start_i(
+            is_seg_start=is_seg_start, strand=strand
+        )
+        if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
+            genomic_pos = genomic_pos - 1  # Convert residue coordinate to inter-residue
         offset = self._get_exon_offset(
-            start_i=tx_exon_aln_data.alt_start_i,
-            end_i=tx_exon_aln_data.alt_end_i,
-            strand=Strand(tx_exon_aln_data.alt_strand),
-            use_start_i=False,  # This doesn't impact anything since we're on the exon
-            is_in_exon=True,
-            start=genomic_pos if is_seg_start else None,
-            end=genomic_pos if not is_seg_start else None,
+            genomic_pos=genomic_pos,
+            exon_boundary=tx_exon_aln_data.alt_start_i
+            if use_alt_start_i
+            else tx_exon_aln_data.alt_end_i,
+            strand=strand,
         )
 
         genomic_location, err_msg = self._get_vrs_seq_loc(
@@ -1150,6 +1169,24 @@ def _is_exonic_breakpoint(pos: int, tx_genomic_coords: list[_ExonCoord]) -> bool
             exon.alt_start_i <= pos <= exon.alt_end_i for exon in tx_genomic_coords
         )
 
+    @staticmethod
+    def _use_alt_start_i(is_seg_start: bool, strand: Strand) -> bool:
+        """Determine whether to use alt_start_i or alt_end_i from UTA when computing
+        exon offset
+
+        :param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment starts.
+            ``False`` if ``genomic_pos`` is where the transcript segment ends.
+        :param strand: The transcribed strand
+        :return ``True`` if alt_start_i should be used, ``False`` if alt_end_i should
+        be used
+        """
+        return bool(
+            is_seg_start
+            and strand == Strand.POSITIVE
+            or not is_seg_start
+            and strand == Strand.NEGATIVE
+        )
+
     @staticmethod
     def _get_adjacent_exon(
         tx_exons_genomic_coords: list[_ExonCoord],
@@ -1205,38 +1242,21 @@ def _get_adjacent_exon(
 
     @staticmethod
     def _get_exon_offset(
-        start_i: int,
-        end_i: int,
+        genomic_pos: int,
+        exon_boundary: int,
         strand: Strand,
-        use_start_i: bool = True,
-        is_in_exon: bool = True,
-        start: int | None = None,
-        end: int | None = None,
     ) -> int:
         """Compute offset from exon start or end index
 
-        :param start_i: Exon start index (inter-residue)
-        :param end_i: Exon end index (inter-residue)
-        :param strand: Strand
-        :param use_start_i: Whether or not ``start_i`` should be used to compute the
-            offset, defaults to ``True``. This is only used when ``is_in_exon`` is
-            ``False``.
-        :param is_in_exon: Whether or not the position occurs in an exon, defaults to
-            ``True``
-        :param start: Provided start position, defaults to ``None``. Must provide
-            ``start`` or ``end``, not both.
-        :param end: Provided end position, defaults to ``None``. Must provide ``start``
-            or ``end``, not both
+        :param genomic_pos: The supplied genomic position. This can represent, for
+            example, a fusion junction breakpoint
+        :param exon_boundary: The genomic position for the exon boundary that the offset
+            is being computed against
+        :paran strand: The transcribed strand
         :return: Offset from exon start or end index
         """
-        if is_in_exon:
-            if start is not None:
-                offset = start - start_i if strand == Strand.POSITIVE else end_i - start
-            else:
-                offset = end - end_i if strand == Strand.POSITIVE else start_i - end
-        else:
-            if strand == Strand.POSITIVE:
-                offset = start - start_i if use_start_i else end - end_i
-            else:
-                offset = start_i - end if use_start_i else end_i - start
-        return offset
+        return (
+            genomic_pos - exon_boundary
+            if strand == Strand.POSITIVE
+            else (genomic_pos - exon_boundary) * -1
+        )
diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py
index c779953..4b61564 100644
--- a/tests/mappers/test_exon_genomic_coords.py
+++ b/tests/mappers/test_exon_genomic_coords.py
@@ -10,6 +10,7 @@
     _ExonCoord,
 )
 from cool_seq_tool.schemas import (
+    CoordinateType,
     Strand,
 )
 
@@ -880,6 +881,21 @@ def test_is_exonic_breakpoint(test_egc_mapper, nm_001105539_exons_genomic_coords
     assert resp is True  # Breakpoint does occur on an exon
 
 
+def test_use_alt_start_i(test_egc_mapper):
+    """Test when to use alt_start_i or alt_end_i from UTA"""
+    resp = test_egc_mapper._use_alt_start_i(is_seg_start=True, strand=Strand.POSITIVE)
+    assert resp
+
+    resp = test_egc_mapper._use_alt_start_i(is_seg_start=False, strand=Strand.NEGATIVE)
+    assert resp
+
+    resp = test_egc_mapper._use_alt_start_i(is_seg_start=True, strand=Strand.NEGATIVE)
+    assert not resp
+
+    resp = test_egc_mapper._use_alt_start_i(is_seg_start=False, strand=Strand.POSITIVE)
+    assert not resp
+
+
 @pytest.mark.asyncio()
 async def test_genomic_to_transcript_fusion_context(
     test_egc_mapper,
@@ -900,15 +916,6 @@ async def test_genomic_to_transcript_fusion_context(
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(resp, zbtb10_exon3_end)
 
-    inputs = {
-        "chromosome": "chr8",
-        "seg_end_genomic": 80514010,
-        "gene": "ZBTB10",
-        "get_nearest_transcript_junction": True,
-    }
-    resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
-    genomic_tx_seg_service_checks(resp, zbtb10_exon3_end)
-
     inputs = {
         "chromosome": "8",
         "seg_start_genomic": 80518580,
@@ -981,6 +988,67 @@ async def test_genomic_to_transcript_fusion_context(
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(resp, gusbp3_exon5_start)
 
+    # Test with residue coordinates
+    inputs = {
+        "chromosome": "8",
+        "seg_end_genomic": 80514010,
+        "gene": "ZBTB10",
+        "get_nearest_transcript_junction": True,
+        "coordinate_type": CoordinateType.RESIDUE,
+    }
+    resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
+    genomic_tx_seg_service_checks(resp, zbtb10_exon3_end)
+
+    inputs = {
+        "chromosome": "8",
+        "seg_start_genomic": 80518581,
+        "gene": "ZBTB10",
+        "get_nearest_transcript_junction": True,
+        "coordinate_type": CoordinateType.RESIDUE,
+    }
+    resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
+    genomic_tx_seg_service_checks(resp, zbtb10_exon5_start)
+
+    inputs = {
+        "chromosome": "1",
+        "seg_end_genomic": 154171411,
+        "gene": "TPM3",
+        "get_nearest_transcript_junction": True,
+        "coordinate_type": CoordinateType.RESIDUE,
+    }
+    resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
+    genomic_tx_seg_service_checks(resp, tpm3_exon6_end)
+
+    inputs = {
+        "chromosome": "1",
+        "seg_start_genomic": 154173080,
+        "gene": "TPM3",
+        "get_nearest_transcript_junction": True,
+        "coordinate_type": CoordinateType.RESIDUE,
+    }
+    resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
+    genomic_tx_seg_service_checks(resp, tpm3_exon5_start)
+
+    inputs = {
+        "chromosome": "5",
+        "seg_end_genomic": 69680765,
+        "gene": "GUSBP3",
+        "get_nearest_transcript_junction": True,
+        "coordinate_type": CoordinateType.RESIDUE,
+    }
+    resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
+    genomic_tx_seg_service_checks(resp, gusbp3_exon2_end)
+
+    inputs = {
+        "chromosome": "5",
+        "seg_start_genomic": 69645878,
+        "gene": "GUSBP3",
+        "get_nearest_transcript_junction": True,
+        "coordinate_type": CoordinateType.RESIDUE,
+    }
+    resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
+    genomic_tx_seg_service_checks(resp, gusbp3_exon5_start)
+
 
 @pytest.mark.asyncio()
 async def test_get_alt_ac_start_and_end(
@@ -1070,6 +1138,59 @@ async def test_genomic_to_transcript(test_egc_mapper, tpm3_exon1, tpm3_exon8):
     )
     genomic_tx_seg_checks(resp, tpm3_exon8)
 
+    # Test with residue coordinates
+    resp = await test_egc_mapper._genomic_to_tx_segment(
+        154192135,
+        genomic_ac="NC_000001.11",
+        transcript="NM_152263.3",
+        gene="TPM3",
+        coordinate_type=CoordinateType.RESIDUE,
+    )
+    genomic_tx_seg_checks(resp, tpm3_exon1)
+
+    resp = await test_egc_mapper._genomic_to_tx_segment(
+        154192135,
+        chromosome="1",
+        transcript="NM_152263.3",
+        coordinate_type=CoordinateType.RESIDUE,
+    )
+    genomic_tx_seg_checks(resp, tpm3_exon1)
+
+    resp = await test_egc_mapper._genomic_to_tx_segment(
+        154192135,
+        chromosome="1",
+        transcript="NM_152263.3",
+        coordinate_type=CoordinateType.RESIDUE,
+    )
+    genomic_tx_seg_checks(resp, tpm3_exon1)
+
+    resp = await test_egc_mapper._genomic_to_tx_segment(
+        154170400,
+        genomic_ac="NC_000001.11",
+        transcript="NM_152263.3",
+        is_seg_start=False,
+        coordinate_type=CoordinateType.RESIDUE,
+    )
+    genomic_tx_seg_checks(resp, tpm3_exon8)
+
+    resp = await test_egc_mapper._genomic_to_tx_segment(
+        154170400,
+        chromosome="1",
+        transcript="NM_152263.3",
+        is_seg_start=False,
+        coordinate_type=CoordinateType.RESIDUE,
+    )
+    genomic_tx_seg_checks(resp, tpm3_exon8)
+
+    resp = await test_egc_mapper._genomic_to_tx_segment(
+        154170400,
+        chromosome="1",
+        transcript="NM_152263.3",
+        is_seg_start=False,
+        coordinate_type=CoordinateType.RESIDUE,
+    )
+    genomic_tx_seg_checks(resp, tpm3_exon8)
+
 
 @pytest.mark.asyncio()
 async def test_tpm3(

From 49ef3dfbcc5a4ceef69f5fe83cf2aa137d63829d Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Mon, 9 Dec 2024 09:09:11 -0500
Subject: [PATCH 02/16] Fix docstrings

---
 src/cool_seq_tool/mappers/exon_genomic_coords.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index 2717012..683df69 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -461,7 +461,7 @@ async def genomic_to_tx_segment(
         :param gene: A valid, case-sensitive HGNC symbol. Must be given if no ``transcript``
             value is provided.
         :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
-            ``seg_end_genomic``
+            ``seg_end_genomic``. Expects inter-residue coordinates by default
         :return: Genomic data (inter-residue coordinates)
         """
         errors = []
@@ -747,7 +747,6 @@ async def _genomic_to_tx_segment(
         errors.
 
         :param genomic_pos: Genomic position where the transcript segment starts or ends
-            (inter-residue based)
         :param chromosome: Chromosome. Must give chromosome without a prefix
             (i.e. ``1`` or ``X``). If not provided, must provide ``genomic_ac``. If
             position maps to both GRCh37 and GRCh38, GRCh38 assembly will be used.
@@ -769,7 +768,7 @@ async def _genomic_to_tx_segment(
         :param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment starts.
             ``False`` if ``genomic_pos`` is where the transcript segment ends.
         :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
-            ``seg_end_genomic``
+            ``seg_end_genomic``. Expects inter-residue coordinates by default
         :return: Data for a transcript segment boundary (inter-residue coordinates)
         """
         params = {key: None for key in GenomicTxSeg.model_fields}
@@ -1180,7 +1179,7 @@ def _use_alt_start_i(is_seg_start: bool, strand: Strand) -> bool:
         :return ``True`` if alt_start_i should be used, ``False`` if alt_end_i should
         be used
         """
-        return bool(
+        return (
             is_seg_start
             and strand == Strand.POSITIVE
             or not is_seg_start
@@ -1249,7 +1248,8 @@ def _get_exon_offset(
         """Compute offset from exon start or end index
 
         :param genomic_pos: The supplied genomic position. This can represent, for
-            example, a fusion junction breakpoint
+            example, a fusion junction breakpoint. This position is represented using
+            inter-residue coordinates
         :param exon_boundary: The genomic position for the exon boundary that the offset
             is being computed against
         :paran strand: The transcribed strand

From 99beca087bc4b3833bbb5a191925dfcb1f979938 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 2 Jan 2025 11:32:49 -0500
Subject: [PATCH 03/16] Refactor genomic_to_tx_segment

---
 .../mappers/exon_genomic_coords.py            | 475 +++++++++++-------
 src/cool_seq_tool/sources/uta_database.py     |  32 ++
 tests/mappers/test_exon_genomic_coords.py     |  38 +-
 tests/sources/test_uta_database.py            |  20 +
 4 files changed, 362 insertions(+), 203 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index 41e982f..f12438b 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -411,7 +411,6 @@ async def genomic_to_tx_segment(
         seg_start_genomic: int | None = None,
         seg_end_genomic: int | None = None,
         transcript: str | None = None,
-        get_nearest_transcript_junction: bool = False,
         gene: str | None = None,
         coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
     ) -> GenomicTxSegService:
@@ -451,13 +450,6 @@ async def genomic_to_tx_segment(
             following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
             Compatible Transcript. See the :ref:`Transcript Selection policy <transcript_selection_policy>`
             page.
-        :param get_nearest_transcript_junction: If ``True``, this will return the
-            adjacent exon if the position specified by``seg_start_genomic`` or
-            ``seg_end_genomic`` does not occur on an exon. For the positive strand, adjacent
-            is defined as the exon preceding the breakpoint for the 5' end and the exon
-            following the breakpoint for the 3' end. For the negative strand, adjacent
-            is defined as the exon following the breakpoint for the 5' end and the exon
-            preceding the breakpoint for the 3' end.
         :param gene: A valid, case-sensitive HGNC symbol. Must be given if no ``transcript``
             value is provided.
         :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
@@ -485,7 +477,6 @@ async def genomic_to_tx_segment(
                 genomic_ac=genomic_ac,
                 transcript=transcript,
                 gene=gene,
-                get_nearest_transcript_junction=get_nearest_transcript_junction,
                 is_seg_start=True,
                 coordinate_type=coordinate_type,
             )
@@ -506,7 +497,6 @@ async def genomic_to_tx_segment(
                 genomic_ac=genomic_ac,
                 transcript=transcript,
                 gene=gene,
-                get_nearest_transcript_junction=get_nearest_transcript_junction,
                 is_seg_start=False,
                 coordinate_type=coordinate_type,
             )
@@ -730,14 +720,13 @@ def _get_vrs_seq_loc(
             end=genomic_pos if not use_start else None,
         ), None
 
-    async def _genomic_to_tx_segment(
+    async def _genomic_to_tx_segment_old(
         self,
         genomic_pos: int,
         chromosome: str | None = None,
         genomic_ac: str | None = None,
         transcript: str | None = None,
         gene: str | None = None,
-        get_nearest_transcript_junction: bool = False,
         is_seg_start: bool = True,
         coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
     ) -> GenomicTxSeg:
@@ -758,13 +747,6 @@ async def _genomic_to_tx_segment(
             following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
             Compatible Transcript
         :param gene: Valid, case-sensitive HGNC gene symbol
-        :param get_nearest_transcript_junction: If ``True``, this will return the
-            adjacent exon if the position specified by``seg_start_genomic`` or
-            ``seg_end_genomic`` does not occur on an exon. For the positive strand, adjacent
-            is defined as the exon preceding the breakpoint for the 5' end and the exon
-            following the breakpoint for the 3' end. For the negative strand, adjacent
-            is defined as the exon following the breakpoint for the 5' end and the exon
-            preceding the breakpoint for the 3' end.
         :param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment starts.
             ``False`` if ``genomic_pos`` is where the transcript segment ends.
         :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
@@ -773,171 +755,314 @@ async def _genomic_to_tx_segment(
         """
         params = {key: None for key in GenomicTxSeg.model_fields}
 
-        if get_nearest_transcript_junction:
-            if not gene and not transcript:
+        if not gene and not transcript:
+            return GenomicTxSeg(
+                errors=[
+                    "`gene` or `transcript` must be provided to select the adjacent transcript junction"
+                ]
+            )
+
+        if not genomic_ac:
+            genomic_acs, err_msg = self.seqrepo_access.chromosome_to_acs(chromosome)
+
+            if not genomic_acs:
                 return GenomicTxSeg(
-                    errors=[
-                        "`gene` or `transcript` must be provided to select the adjacent transcript junction"
-                    ]
+                    errors=[err_msg],
                 )
+            genomic_ac = genomic_acs[0]
 
-            if not genomic_ac:
-                genomic_acs, err_msg = self.seqrepo_access.chromosome_to_acs(chromosome)
+        # Validate gene symbol exists
+        if gene:
+            valid_gene = await self.uta_db.validate_gene_symbol(gene=gene)
+            if not valid_gene:
+                return GenomicTxSeg(errors=[f"{gene} does not exist in UTA"])
 
-                if not genomic_acs:
-                    return GenomicTxSeg(
-                        errors=[err_msg],
-                    )
-                genomic_ac = genomic_acs[0]
+        # Always liftover to GRCh38
+        genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos(
+            genomic_ac, genomic_pos
+        )
+        if err_msg:
+            return GenomicTxSeg(errors=[err_msg])
 
-            # Always liftover to GRCh38
-            genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos(
-                genomic_ac, genomic_pos
+        # Validate coordinate is plausible
+        coordinate_check = await self._validate_gene_coordinates(
+            pos=genomic_pos, genomic_ac=genomic_ac, gene=gene
+        )
+        if not coordinate_check:
+            return GenomicTxSeg(
+                errors=[
+                    f"{genomic_pos} on {genomic_ac} does not occur within the exons for {gene}"
+                ]
             )
-            if err_msg:
-                return GenomicTxSeg(errors=[err_msg])
 
-            if not transcript:
-                # Select a transcript if not provided
-                mane_transcripts = self.mane_transcript_mappings.get_gene_mane_data(
-                    gene
+        if not transcript:
+            # Select a transcript if not provided
+            mane_transcripts = self.mane_transcript_mappings.get_gene_mane_data(gene)
+
+            if mane_transcripts:
+                transcript = mane_transcripts[0]["RefSeq_nuc"]
+            else:
+                # Attempt to find a coding transcript if a MANE transcript
+                # cannot be found
+                results = await self.uta_db.get_transcripts(
+                    gene=gene, alt_ac=genomic_ac
                 )
 
-                if mane_transcripts:
-                    transcript = mane_transcripts[0]["RefSeq_nuc"]
+                if not results.is_empty():
+                    transcript = results[0]["tx_ac"][0]
                 else:
-                    # Attempt to find a coding transcript if a MANE transcript
-                    # cannot be found
-                    results = await self.uta_db.get_transcripts(
-                        gene=gene, alt_ac=genomic_ac
-                    )
-
-                    if not results.is_empty():
-                        transcript = results[0]["tx_ac"][0]
+                    # Run if gene is for a noncoding transcript
+                    query = f"""
+                        SELECT DISTINCT tx_ac
+                        FROM {self.uta_db.schema}.tx_exon_aln_v
+                        WHERE hgnc = '{gene}'
+                        AND alt_ac = '{genomic_ac}'
+                        """  # noqa: S608
+                    result = await self.uta_db.execute_query(query)
+
+                    if result:
+                        transcript = result[0]["tx_ac"]
                     else:
-                        # Run if gene is for a noncoding transcript
-                        query = f"""
-                            SELECT DISTINCT tx_ac
-                            FROM {self.uta_db.schema}.tx_exon_aln_v
-                            WHERE hgnc = '{gene}'
-                            AND alt_ac = '{genomic_ac}'
-                            """  # noqa: S608
-                        result = await self.uta_db.execute_query(query)
-
-                        if result:
-                            transcript = result[0]["tx_ac"]
-                        else:
-                            return GenomicTxSeg(
-                                errors=[
-                                    f"Could not find a transcript for {gene} on {genomic_ac}"
-                                ]
-                            )
-
-            tx_exons = await self._get_all_exon_coords(
-                tx_ac=transcript, genomic_ac=genomic_ac
+                        return GenomicTxSeg(
+                            errors=[
+                                f"Could not find a transcript for {gene} on {genomic_ac}"
+                            ]
+                        )
+
+        tx_exons = await self._get_all_exon_coords(
+            tx_ac=transcript, genomic_ac=genomic_ac
+        )
+        if not tx_exons:
+            return GenomicTxSeg(errors=[f"No exons found given {transcript}"])
+
+        strand = Strand(tx_exons[0].alt_strand)
+        params["strand"] = strand
+        use_alt_start_i = self._use_alt_start_i(
+            is_seg_start=is_seg_start, strand=strand
+        )
+        if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
+            genomic_pos = genomic_pos - 1  # Convert residue coordinate to inter-residue
+
+        # gene is not required to liftover coordinates if tx_ac and genomic_ac are given, but we should set the associated gene
+        if not gene:
+            _gene, err_msg = await self._get_tx_ac_gene(transcript)
+            if err_msg:
+                return GenomicTxSeg(errors=[err_msg])
+            gene = _gene
+
+        # Check if breakpoint occurs on an exon.
+        # If not, determine the adjacent exon given the selected transcript
+        if not self._is_exonic_breakpoint(genomic_pos, tx_exons):
+            exon_num = self._get_adjacent_exon(
+                tx_exons_genomic_coords=tx_exons,
+                strand=strand,
+                start=genomic_pos if is_seg_start else None,
+                end=genomic_pos if not is_seg_start else None,
             )
-            if not tx_exons:
-                return GenomicTxSeg(errors=[f"No exons found given {transcript}"])
 
-            strand = Strand(tx_exons[0].alt_strand)
-            params["strand"] = strand
-            use_alt_start_i = self._use_alt_start_i(
-                is_seg_start=is_seg_start, strand=strand
+            offset = self._get_exon_offset(
+                genomic_pos=genomic_pos,
+                exon_boundary=tx_exons[exon_num].alt_start_i
+                if use_alt_start_i
+                else tx_exons[exon_num].alt_end_i,
+                strand=strand,
             )
-            if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
-                genomic_pos = (
-                    genomic_pos - 1
-                )  # Convert residue coordinate to inter-residue
-
-            # Check if breakpoint occurs on an exon.
-            # If not, determine the adjacent exon given the selected transcript
-            if not self._is_exonic_breakpoint(genomic_pos, tx_exons):
-                exon_num = self._get_adjacent_exon(
-                    tx_exons_genomic_coords=tx_exons,
-                    strand=strand,
-                    start=genomic_pos if is_seg_start else None,
-                    end=genomic_pos if not is_seg_start else None,
-                )
 
-                offset = self._get_exon_offset(
-                    genomic_pos=genomic_pos,
-                    exon_boundary=tx_exons[exon_num].alt_start_i
-                    if use_alt_start_i
-                    else tx_exons[exon_num].alt_end_i,
-                    strand=strand,
-                )
+            genomic_location, err_msg = self._get_vrs_seq_loc(
+                genomic_ac, genomic_pos, is_seg_start, strand
+            )
+            if err_msg:
+                return GenomicTxSeg(errors=[err_msg])
 
-                genomic_location, err_msg = self._get_vrs_seq_loc(
-                    genomic_ac, genomic_pos, is_seg_start, strand
-                )
-                if err_msg:
-                    return GenomicTxSeg(errors=[err_msg])
+            return GenomicTxSeg(
+                gene=gene,
+                genomic_ac=genomic_ac,
+                tx_ac=transcript,
+                seg=TxSegment(
+                    exon_ord=exon_num,
+                    offset=offset,
+                    genomic_location=genomic_location,
+                ),
+            )
 
-                # gene is not required to liftover coordinates if tx_ac and genomic_ac are given, but we should set the associated gene
-                if not gene:
-                    _gene, err_msg = await self._get_tx_ac_gene(transcript)
-                    if err_msg:
-                        return GenomicTxSeg(errors=[err_msg])
-                    gene = _gene
+        return await self._get_tx_seg_genomic_metadata(
+            genomic_ac,
+            genomic_pos,
+            is_seg_start,
+            gene,
+            tx_ac=transcript,
+        )
 
-                return GenomicTxSeg(
-                    gene=gene,
-                    genomic_ac=genomic_ac,
-                    tx_ac=transcript,
-                    seg=TxSegment(
-                        exon_ord=exon_num,
-                        offset=offset,
-                        genomic_location=genomic_location,
-                    ),
-                )
+    async def _genomic_to_tx_segment(
+        self,
+        genomic_pos: int,
+        chromosome: str | None = None,
+        genomic_ac: str | None = None,
+        transcript: str | None = None,
+        gene: str | None = None,
+        is_seg_start: bool = True,
+        coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
+    ) -> GenomicTxSeg:
+        """Given genomic data, generate a boundary for a transcript segment.
+
+        Will liftover to GRCh38 assembly. If liftover is unsuccessful, will return
+        errors.
+
+        :param genomic_pos: Genomic position where the transcript segment starts or ends
+        :param chromosome: Chromosome. Must give chromosome without a prefix
+            (i.e. ``1`` or ``X``). If not provided, must provide ``genomic_ac``. If
+            position maps to both GRCh37 and GRCh38, GRCh38 assembly will be used.
+            If ``genomic_ac`` is also provided, ``genomic_ac`` will be used.
+        :param genomic_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
+            must provide ``chromosome. If ``chromosome`` is also provided, ``genomic_ac``
+            will be used.
+        :param transcript: The transcript to use. If this is not given, we will try the
+            following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
+            Compatible Transcript
+        :param gene: Valid, case-sensitive HGNC gene symbol
+        :param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment starts.
+            ``False`` if ``genomic_pos`` is where the transcript segment ends.
+        :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
+            ``seg_end_genomic``. Expects inter-residue coordinates by default
+        :return: Data for a transcript segment boundary (inter-residue coordinates)
+        """
+        params = {key: None for key in GenomicTxSeg.model_fields}
+
+        if not gene and not transcript:
+            return GenomicTxSeg(errors=["`gene` or `transcript` must be provided"])
+
+        # Validate inputs exist in UTA
+        if gene:
+            gene_validation = await self.uta_db.validate_gene_symbol(gene)
+            if not gene_validation:
+                return GenomicTxSeg(errors=[f"{gene} does not exist in UTA"])
+
+        if transcript:
+            transcript_validation = await self.uta_db.validate_transcript(transcript)
+            if not transcript_validation:
+                return GenomicTxSeg(errors=[f"{transcript} does not exist in UTA"])
 
         if genomic_ac:
-            _gene, err_msg = await self._get_genomic_ac_gene(genomic_pos, genomic_ac)
+            genomic_ac_validation = await self.uta_db.validate_genomic_ac(genomic_ac)
+            if not genomic_ac_validation:
+                return GenomicTxSeg(errors=[f"{genomic_ac} does not exist in UTA"])
 
-            if err_msg:
-                return GenomicTxSeg(errors=[err_msg])
+        if not genomic_ac:
+            genomic_acs, err_msg = self.seqrepo_access.chromosome_to_acs(chromosome)
 
-            if gene and _gene != gene:
+            if not genomic_acs:
                 return GenomicTxSeg(
-                    errors=[f"Expected gene, {gene}, but found {_gene}"]
+                    errors=[err_msg],
                 )
+            genomic_ac = genomic_acs[0]
 
-            gene = _gene
-        elif chromosome:
-            # Try GRCh38 first
-            for assembly in [Assembly.GRCH38.value, Assembly.GRCH37.value]:
-                _genomic_acs, err_msg = self.seqrepo_access.translate_identifier(
-                    f"{assembly}:chr{chromosome}", "refseq"
-                )
-                if err_msg:
-                    return GenomicTxSeg(errors=[err_msg])
-                _genomic_ac = _genomic_acs[0].split(":")[-1]
+        # Always liftover to GRCh38
+        genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos(
+            genomic_ac,
+            genomic_pos,
+        )
+        if err_msg:
+            return GenomicTxSeg(errors=[err_msg])
 
-                _gene, err_msg = await self._get_genomic_ac_gene(
-                    genomic_pos, _genomic_ac
+        # Select a transcript if not provided
+        if not transcript:
+            mane_transcripts = self.mane_transcript_mappings.get_gene_mane_data(gene)
+
+            if mane_transcripts:
+                transcript = mane_transcripts[0]["RefSeq_nuc"]
+            else:
+                # Attempt to find a coding transcript if a MANE transcript
+                # cannot be found
+                results = await self.uta_db.get_transcripts(
+                    gene=gene, alt_ac=genomic_ac
                 )
-                if _gene:
-                    if gene and _gene != gene:
+
+                if not results.is_empty():
+                    transcript = results[0]["tx_ac"][0]
+                else:
+                    # Run if gene is for a noncoding transcript
+                    query = f"""
+                        SELECT DISTINCT tx_ac
+                        FROM {self.uta_db.schema}.tx_exon_aln_v
+                        WHERE hgnc = '{gene}'
+                        AND alt_ac = '{genomic_ac}'
+                        """  # noqa: S608
+                    result = await self.uta_db.execute_query(query)
+
+                    if result:
+                        transcript = result[0]["tx_ac"]
+                    else:
                         return GenomicTxSeg(
-                            errors=[f"Expected gene, {gene}, but found {_gene}"]
+                            errors=[
+                                f"Could not find a transcript for {gene} on {genomic_ac}"
+                            ]
                         )
-                    gene = _gene
-                    genomic_ac = _genomic_ac
-                    break
+        tx_exons = await self._get_all_exon_coords(
+            tx_ac=transcript, genomic_ac=genomic_ac
+        )
+        if not tx_exons:
+            return GenomicTxSeg(errors=[f"No exons found given {transcript}"])
 
-            if not genomic_ac:
-                return GenomicTxSeg(
-                    errors=[
-                        f"Unable to get genomic RefSeq accession for chromosome {chromosome} on position {genomic_pos}"
-                    ]
-                )
+        strand = Strand(tx_exons[0].alt_strand)
+        params["strand"] = strand
+        use_alt_start_i = self._use_alt_start_i(
+            is_seg_start=is_seg_start, strand=strand
+        )
+        if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
+            genomic_pos = genomic_pos - 1  # Convert residue coordinate to inter-residue
 
-            if not gene:
-                return GenomicTxSeg(
-                    errors=[
-                        f"Unable to get gene given {genomic_ac} on position {genomic_pos}"
-                    ]
-                )
+        # gene is not required to liftover coordinates if tx_ac and genomic_ac are given, but we should set the associated gene
+        if not gene:
+            _gene, err_msg = await self._get_tx_ac_gene(transcript)
+            if err_msg:
+                return GenomicTxSeg(errors=[err_msg])
+            gene = _gene
+
+        # Validate that the breakpoint occurs on a transcript given a gene
+        coordinate_check = await self._validate_gene_coordinates(
+            pos=genomic_pos, genomic_ac=genomic_ac, gene=gene
+        )
+        if not coordinate_check:
+            return GenomicTxSeg(
+                errors=[
+                    f"{genomic_pos} on {genomic_ac} does not occur within the exons for {gene}"
+                ]
+            )
+
+        # Check if breakpoint occurs on an exon.
+        # If not, determine the adjacent exon given the selected transcript
+        if not self._is_exonic_breakpoint(genomic_pos, tx_exons):
+            exon_num = self._get_adjacent_exon(
+                tx_exons_genomic_coords=tx_exons,
+                strand=strand,
+                start=genomic_pos if is_seg_start else None,
+                end=genomic_pos if not is_seg_start else None,
+            )
+
+            offset = self._get_exon_offset(
+                genomic_pos=genomic_pos,
+                exon_boundary=tx_exons[exon_num].alt_start_i
+                if use_alt_start_i
+                else tx_exons[exon_num].alt_end_i,
+                strand=strand,
+            )
+
+            genomic_location, err_msg = self._get_vrs_seq_loc(
+                genomic_ac, genomic_pos, is_seg_start, strand
+            )
+            if err_msg:
+                return GenomicTxSeg(errors=[err_msg])
+
+            return GenomicTxSeg(
+                gene=gene,
+                genomic_ac=genomic_ac,
+                tx_ac=transcript,
+                seg=TxSegment(
+                    exon_ord=exon_num,
+                    offset=offset,
+                    genomic_location=genomic_location,
+                ),
+            )
 
         return await self._get_tx_seg_genomic_metadata(
             genomic_ac,
@@ -945,11 +1070,13 @@ async def _genomic_to_tx_segment(
             is_seg_start,
             gene,
             tx_ac=transcript,
-            coordinate_type=coordinate_type,
         )
 
     async def _get_grch38_ac_pos(
-        self, genomic_ac: str, genomic_pos: int, grch38_ac: str | None = None
+        self,
+        genomic_ac: str,
+        genomic_pos: int,
+        grch38_ac: str | None = None,
     ) -> tuple[str | None, int | None, str | None]:
         """Get GRCh38 genomic representation for accession and position
 
@@ -960,6 +1087,7 @@ async def _get_grch38_ac_pos(
         :return: Tuple containing GRCh38 accession, GRCh38 position, and error message
             if unable to get GRCh38 representation
         """
+        # Validate accession exists
         if not grch38_ac:
             grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac)
             if not grch38_ac:
@@ -983,7 +1111,6 @@ async def _get_grch38_ac_pos(
                     None,
                     f"`genomic_ac` must use {Assembly.GRCH37.value} or {Assembly.GRCH38.value} assembly.",
                 )
-
             chromosome = chromosome[-1].split(":")[-1]
             liftover_data = self.liftover.get_liftover(
                 chromosome, genomic_pos, Assembly.GRCH38
@@ -994,17 +1121,17 @@ async def _get_grch38_ac_pos(
                     None,
                     f"Lifting over {genomic_pos} on {genomic_ac} from {Assembly.GRCH37.value} to {Assembly.GRCH38.value} was unsuccessful.",
                 )
-
             genomic_pos = liftover_data[1]
             genomic_ac = grch38_ac
 
         return genomic_ac, genomic_pos, None
 
-    async def _get_genomic_ac_gene(
+    async def _validate_gene_coordinates(
         self,
         pos: int,
         genomic_ac: str,
-    ) -> tuple[str | None, str | None]:
+        gene: str,
+    ) -> bool:
         """Get gene given a genomic accession and position.
 
         If multiple genes are found for a given ``pos`` and ``genomic_ac``, only one
@@ -1012,23 +1139,30 @@ async def _get_genomic_ac_gene(
 
         :param pos: Genomic position on ``genomic_ac``
         :param genomic_ac: RefSeq genomic accession, e.g. ``"NC_000007.14"``
-        :return: HGNC gene symbol associated to genomic accession and position and
-            warning
+        :param gene: A gene symbol
+        :return: ``True`` if the coordinate falls within the first and last exon
+            for the gene, ``False`` if not
         """
         query = f"""
+            WITH tx_boundaries AS (
+                    SELECT
+                    tx_ac,
+                    hgnc,
+                    MIN(alt_start_i) as min_start,
+                    MAX(alt_end_i) as max_end
+                FROM {self.uta_db.schema}.tx_exon_aln_v
+                WHERE hgnc = '{gene}'
+                AND alt_ac = '{genomic_ac}'
+                GROUP BY tx_ac, hgnc
+            )
             SELECT DISTINCT hgnc
-            FROM {self.uta_db.schema}.tx_exon_aln_v
-            WHERE alt_ac = '{genomic_ac}'
-            AND alt_aln_method = 'splign'
-            AND {pos} BETWEEN alt_start_i AND alt_end_i
+            FROM tx_boundaries
+            WHERE {pos} between tx_boundaries.min_start and tx_boundaries.max_end
             ORDER BY hgnc
             LIMIT 1;
             """  # noqa: S608
         results = await self.uta_db.execute_query(query)
-        if not results:
-            return None, f"No gene(s) found given {genomic_ac} on position {pos}"
-
-        return results[0]["hgnc"], None
+        return bool(results)
 
     async def _get_tx_ac_gene(
         self,
@@ -1063,7 +1197,6 @@ async def _get_tx_seg_genomic_metadata(
         is_seg_start: bool,
         gene: str,
         tx_ac: str | None,
-        coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
     ) -> GenomicTxSeg:
         """Get transcript segment data and associated genomic metadata.
 
@@ -1129,8 +1262,6 @@ async def _get_tx_seg_genomic_metadata(
         use_alt_start_i = self._use_alt_start_i(
             is_seg_start=is_seg_start, strand=strand
         )
-        if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
-            genomic_pos = genomic_pos - 1  # Convert residue coordinate to inter-residue
         offset = self._get_exon_offset(
             genomic_pos=genomic_pos,
             exon_boundary=tx_exon_aln_data.alt_start_i
diff --git a/src/cool_seq_tool/sources/uta_database.py b/src/cool_seq_tool/sources/uta_database.py
index ee0c631..ab853bf 100644
--- a/src/cool_seq_tool/sources/uta_database.py
+++ b/src/cool_seq_tool/sources/uta_database.py
@@ -378,6 +378,38 @@ async def validate_genomic_ac(self, ac: str) -> bool:
         result = await self.execute_query(query)
         return result[0][0]
 
+    async def validate_gene_symbol(self, gene: str) -> bool:
+        """Return whether or not a gene symbol exists in UTA gene table
+
+        :param gene: Gene symbol
+        :return ``True`` if gene symbol exists in UTA, ``False`` if not
+        """
+        query = f"""
+            SELECT EXISTS(
+                SELECT hgnc
+                FROM {self.schema}.gene
+                WHERE hgnc = '{gene}'
+            );
+            """  # noqa: S608
+        result = await self.execute_query(query)
+        return result[0][0]
+
+    async def validate_transcript(self, transcript: str) -> bool:
+        """Return whether or not a transcript exists in the UTA tx_exon_aln_v table
+
+        :param transcript: A transcript accession
+        :return ``True`` if transcript exists in UTA, ``False`` if not
+        """
+        query = f"""
+            SELECT EXISTS(
+                SELECT tx_ac
+                FROM {self.schema}.tx_exon_aln_v
+                WHERE tx_ac = '{transcript}'
+            );
+            """  # noqa: S608
+        result = await self.execute_query(query)
+        return result[0][0]
+
     async def get_ac_descr(self, ac: str) -> str | None:
         """Return accession description. This is typically available only for accessions
         from older (pre-GRCh38) builds.
diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py
index 9a3522e..b783b4e 100644
--- a/tests/mappers/test_exon_genomic_coords.py
+++ b/tests/mappers/test_exon_genomic_coords.py
@@ -923,7 +923,6 @@ async def test_genomic_to_transcript_fusion_context(
         "chromosome": "8",
         "seg_end_genomic": 80514010,
         "gene": "ZBTB10",
-        "get_nearest_transcript_junction": True,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(resp, zbtb10_exon3_end)
@@ -932,7 +931,6 @@ async def test_genomic_to_transcript_fusion_context(
         "chromosome": "8",
         "seg_start_genomic": 80518580,
         "gene": "ZBTB10",
-        "get_nearest_transcript_junction": True,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(resp, zbtb10_exon5_start)
@@ -941,7 +939,6 @@ async def test_genomic_to_transcript_fusion_context(
         "chromosome": "1",
         "seg_end_genomic": 154171410,
         "gene": "TPM3",
-        "get_nearest_transcript_junction": True,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(resp, tpm3_exon6_end)
@@ -950,7 +947,6 @@ async def test_genomic_to_transcript_fusion_context(
         "chromosome": "1",
         "seg_start_genomic": 154173080,
         "gene": "TPM3",
-        "get_nearest_transcript_junction": True,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(resp, tpm3_exon5_start)
@@ -959,7 +955,6 @@ async def test_genomic_to_transcript_fusion_context(
         "chromosome": "5",
         "seg_end_genomic": 69680764,
         "gene": "GUSBP3",
-        "get_nearest_transcript_junction": True,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(resp, gusbp3_exon2_end)
@@ -968,7 +963,6 @@ async def test_genomic_to_transcript_fusion_context(
         "chromosome": "5",
         "seg_start_genomic": 69645878,
         "gene": "GUSBP3",
-        "get_nearest_transcript_junction": True,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(resp, gusbp3_exon5_start)
@@ -976,7 +970,6 @@ async def test_genomic_to_transcript_fusion_context(
     inputs = {  # Test when gene and transcript are not provided
         "chromosome": "5",
         "seg_start_genomic": 69645878,
-        "get_nearest_transcript_junction": True,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     assert resp.errors[0] == "Must provide either `gene` or `transcript`"
@@ -986,7 +979,6 @@ async def test_genomic_to_transcript_fusion_context(
         "seg_start_genomic": 69645878,
         "gene": "GUSBP3",
         "transcript": "NR_027386.2",
-        "get_nearest_transcript_junction": True,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(resp, gusbp3_exon5_start)
@@ -995,7 +987,6 @@ async def test_genomic_to_transcript_fusion_context(
         "genomic_ac": "NC_000005.10",
         "seg_start_genomic": 69645878,
         "transcript": "NR_027386.2",
-        "get_nearest_transcript_junction": True,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(resp, gusbp3_exon5_start)
@@ -1005,7 +996,6 @@ async def test_genomic_to_transcript_fusion_context(
         "chromosome": "8",
         "seg_end_genomic": 80514010,
         "gene": "ZBTB10",
-        "get_nearest_transcript_junction": True,
         "coordinate_type": CoordinateType.RESIDUE,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
@@ -1015,7 +1005,6 @@ async def test_genomic_to_transcript_fusion_context(
         "chromosome": "8",
         "seg_start_genomic": 80518581,
         "gene": "ZBTB10",
-        "get_nearest_transcript_junction": True,
         "coordinate_type": CoordinateType.RESIDUE,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
@@ -1025,7 +1014,6 @@ async def test_genomic_to_transcript_fusion_context(
         "chromosome": "1",
         "seg_end_genomic": 154171411,
         "gene": "TPM3",
-        "get_nearest_transcript_junction": True,
         "coordinate_type": CoordinateType.RESIDUE,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
@@ -1035,7 +1023,6 @@ async def test_genomic_to_transcript_fusion_context(
         "chromosome": "1",
         "seg_start_genomic": 154173080,
         "gene": "TPM3",
-        "get_nearest_transcript_junction": True,
         "coordinate_type": CoordinateType.RESIDUE,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
@@ -1045,7 +1032,6 @@ async def test_genomic_to_transcript_fusion_context(
         "chromosome": "5",
         "seg_end_genomic": 69680765,
         "gene": "GUSBP3",
-        "get_nearest_transcript_junction": True,
         "coordinate_type": CoordinateType.RESIDUE,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
@@ -1055,7 +1041,6 @@ async def test_genomic_to_transcript_fusion_context(
         "chromosome": "5",
         "seg_start_genomic": 69645878,
         "gene": "GUSBP3",
-        "get_nearest_transcript_junction": True,
         "coordinate_type": CoordinateType.RESIDUE,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
@@ -1452,9 +1437,9 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic):
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end))
 
-    inputs = {"gene": "WEE1", "chromosome": "11", "seg_end_genomic": 9609996}
-    resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
-    assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end))
+    # inputs = {"gene": "WEE1", "chromosome": "11", "seg_end_genomic": 9609996}
+    # resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
+    # assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end))
 
     inputs = {"transcript": "NM_003390.3", "exon_start": 2}
     resp = await test_egc_mapper.tx_segment_to_genomic(**inputs)
@@ -1486,7 +1471,6 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic):
         seg_start_genomic=73442503,
         seg_end_genomic=73457929,  # not on an exon
         gene="ELN",
-        get_nearest_transcript_junction=True,
     )
     genomic_tx_seg_service_checks(resp, eln_grch38_intronic)
 
@@ -1500,7 +1484,7 @@ async def test_invalid(test_egc_mapper):
         seg_end_genomic=154170399,
         genomic_ac="NC_000001.11",
     )
-    assert resp.errors == ["No exons found given NM_152263 3"]
+    assert resp.errors == ["NM_152263 3 does not exist in UTA"]
 
     # start and end not given
     resp = await test_egc_mapper.genomic_to_tx_segment(
@@ -1524,7 +1508,7 @@ async def test_invalid(test_egc_mapper):
         gene="dummy gene",
     )
     genomic_tx_seg_service_checks(resp, is_valid=False)
-    assert resp.errors == ["Expected gene, dummy gene, but found TPM3"]
+    assert resp.errors == ["dummy gene does not exist in UTA"]
 
     # Invalid accession
     resp = await test_egc_mapper.genomic_to_tx_segment(
@@ -1534,7 +1518,7 @@ async def test_invalid(test_egc_mapper):
         transcript="NM_152263.3",
     )
     genomic_tx_seg_service_checks(resp, is_valid=False)
-    assert resp.errors == ["No gene(s) found given NC_000001.200 on position 154191901"]
+    assert resp.errors == ["NC_000001.200 does not exist in UTA"]
 
     # Invalid coordinates
     resp = await test_egc_mapper.genomic_to_tx_segment(
@@ -1545,17 +1529,9 @@ async def test_invalid(test_egc_mapper):
     )
     genomic_tx_seg_service_checks(resp, is_valid=False)
     assert resp.errors == [
-        "No gene(s) found given NC_000001.11 on position 9999999999998"
+        "9999999999998 on NC_000001.11 does not occur within the exons for TPM3"
     ]
 
-    resp = await test_egc_mapper.genomic_to_tx_segment(
-        chromosome="1",
-        seg_start_genomic=154192135,
-        transcript="NM_002529.3",
-    )
-    genomic_tx_seg_service_checks(resp, is_valid=False)
-    assert resp.errors == ["Must find exactly one row for genomic data, but found: 0"]
-
     # Must supply either gene or transcript
     resp = await test_egc_mapper.genomic_to_tx_segment(
         seg_start_genomic=154191901, genomic_ac="NC_000001.11"
diff --git a/tests/sources/test_uta_database.py b/tests/sources/test_uta_database.py
index 4dccf21..6440d7d 100644
--- a/tests/sources/test_uta_database.py
+++ b/tests/sources/test_uta_database.py
@@ -87,6 +87,26 @@ async def test_validate_genomic_ac(test_db):
     assert resp is False
 
 
+@pytest.mark.asyncio()
+async def test_validate_gene_symbol(test_db):
+    """Test validate_gene_symbol"""
+    resp = await test_db.validate_gene_symbol("TPM3")
+    assert resp is True
+
+    resp = await test_db.validate_gene_symbol("dummy gene")
+    assert resp is False
+
+
+@pytest.mark.asyncio()
+async def test_validate_transcript(test_db):
+    """Tests validate_transcript"""
+    resp = await test_db.validate_transcript("NM_152263.3")
+    assert resp is True
+
+    resp = await test_db.validate_transcript("NM_152263 3")
+    assert resp is False
+
+
 @pytest.mark.asyncio()
 async def test_get_ac_descr(test_db):
     """Test that get_ac_descr works correctly."""

From fc541514e090025fe4b241467257867aad956fb0 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 2 Jan 2025 13:29:45 -0500
Subject: [PATCH 04/16] Add translate_identifier

---
 src/cool_seq_tool/mappers/exon_genomic_coords.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index f12438b..055384f 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -763,13 +763,13 @@ async def _genomic_to_tx_segment_old(
             )
 
         if not genomic_ac:
-            genomic_acs, err_msg = self.seqrepo_access.chromosome_to_acs(chromosome)
-
-            if not genomic_acs:
-                return GenomicTxSeg(
-                    errors=[err_msg],
+            for assembly in [Assembly.GRCH38.value, Assembly.GRCH37.value]:
+                _genomic_acs, err_msg = self.seqrepo_access.translate_identifier(
+                    f"{assembly}:chr{chromosome}", "refseq"
                 )
-            genomic_ac = genomic_acs[0]
+                if err_msg:
+                    return GenomicTxSeg(errors=[err_msg])
+            genomic_ac = _genomic_acs[0].split(":")[-1]
 
         # Validate gene symbol exists
         if gene:

From 677492702c2810b4c781d5b650a14606d8d70914 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 2 Jan 2025 14:19:18 -0500
Subject: [PATCH 05/16] Fix liftover issues

---
 .../mappers/exon_genomic_coords.py            | 180 ------------------
 tests/mappers/test_exon_genomic_coords.py     |   6 +-
 2 files changed, 3 insertions(+), 183 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index 055384f..faf1ea9 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -720,179 +720,6 @@ def _get_vrs_seq_loc(
             end=genomic_pos if not use_start else None,
         ), None
 
-    async def _genomic_to_tx_segment_old(
-        self,
-        genomic_pos: int,
-        chromosome: str | None = None,
-        genomic_ac: str | None = None,
-        transcript: str | None = None,
-        gene: str | None = None,
-        is_seg_start: bool = True,
-        coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
-    ) -> GenomicTxSeg:
-        """Given genomic data, generate a boundary for a transcript segment.
-
-        Will liftover to GRCh38 assembly. If liftover is unsuccessful, will return
-        errors.
-
-        :param genomic_pos: Genomic position where the transcript segment starts or ends
-        :param chromosome: Chromosome. Must give chromosome without a prefix
-            (i.e. ``1`` or ``X``). If not provided, must provide ``genomic_ac``. If
-            position maps to both GRCh37 and GRCh38, GRCh38 assembly will be used.
-            If ``genomic_ac`` is also provided, ``genomic_ac`` will be used.
-        :param genomic_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
-            must provide ``chromosome. If ``chromosome`` is also provided, ``genomic_ac``
-            will be used.
-        :param transcript: The transcript to use. If this is not given, we will try the
-            following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
-            Compatible Transcript
-        :param gene: Valid, case-sensitive HGNC gene symbol
-        :param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment starts.
-            ``False`` if ``genomic_pos`` is where the transcript segment ends.
-        :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
-            ``seg_end_genomic``. Expects inter-residue coordinates by default
-        :return: Data for a transcript segment boundary (inter-residue coordinates)
-        """
-        params = {key: None for key in GenomicTxSeg.model_fields}
-
-        if not gene and not transcript:
-            return GenomicTxSeg(
-                errors=[
-                    "`gene` or `transcript` must be provided to select the adjacent transcript junction"
-                ]
-            )
-
-        if not genomic_ac:
-            for assembly in [Assembly.GRCH38.value, Assembly.GRCH37.value]:
-                _genomic_acs, err_msg = self.seqrepo_access.translate_identifier(
-                    f"{assembly}:chr{chromosome}", "refseq"
-                )
-                if err_msg:
-                    return GenomicTxSeg(errors=[err_msg])
-            genomic_ac = _genomic_acs[0].split(":")[-1]
-
-        # Validate gene symbol exists
-        if gene:
-            valid_gene = await self.uta_db.validate_gene_symbol(gene=gene)
-            if not valid_gene:
-                return GenomicTxSeg(errors=[f"{gene} does not exist in UTA"])
-
-        # Always liftover to GRCh38
-        genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos(
-            genomic_ac, genomic_pos
-        )
-        if err_msg:
-            return GenomicTxSeg(errors=[err_msg])
-
-        # Validate coordinate is plausible
-        coordinate_check = await self._validate_gene_coordinates(
-            pos=genomic_pos, genomic_ac=genomic_ac, gene=gene
-        )
-        if not coordinate_check:
-            return GenomicTxSeg(
-                errors=[
-                    f"{genomic_pos} on {genomic_ac} does not occur within the exons for {gene}"
-                ]
-            )
-
-        if not transcript:
-            # Select a transcript if not provided
-            mane_transcripts = self.mane_transcript_mappings.get_gene_mane_data(gene)
-
-            if mane_transcripts:
-                transcript = mane_transcripts[0]["RefSeq_nuc"]
-            else:
-                # Attempt to find a coding transcript if a MANE transcript
-                # cannot be found
-                results = await self.uta_db.get_transcripts(
-                    gene=gene, alt_ac=genomic_ac
-                )
-
-                if not results.is_empty():
-                    transcript = results[0]["tx_ac"][0]
-                else:
-                    # Run if gene is for a noncoding transcript
-                    query = f"""
-                        SELECT DISTINCT tx_ac
-                        FROM {self.uta_db.schema}.tx_exon_aln_v
-                        WHERE hgnc = '{gene}'
-                        AND alt_ac = '{genomic_ac}'
-                        """  # noqa: S608
-                    result = await self.uta_db.execute_query(query)
-
-                    if result:
-                        transcript = result[0]["tx_ac"]
-                    else:
-                        return GenomicTxSeg(
-                            errors=[
-                                f"Could not find a transcript for {gene} on {genomic_ac}"
-                            ]
-                        )
-
-        tx_exons = await self._get_all_exon_coords(
-            tx_ac=transcript, genomic_ac=genomic_ac
-        )
-        if not tx_exons:
-            return GenomicTxSeg(errors=[f"No exons found given {transcript}"])
-
-        strand = Strand(tx_exons[0].alt_strand)
-        params["strand"] = strand
-        use_alt_start_i = self._use_alt_start_i(
-            is_seg_start=is_seg_start, strand=strand
-        )
-        if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
-            genomic_pos = genomic_pos - 1  # Convert residue coordinate to inter-residue
-
-        # gene is not required to liftover coordinates if tx_ac and genomic_ac are given, but we should set the associated gene
-        if not gene:
-            _gene, err_msg = await self._get_tx_ac_gene(transcript)
-            if err_msg:
-                return GenomicTxSeg(errors=[err_msg])
-            gene = _gene
-
-        # Check if breakpoint occurs on an exon.
-        # If not, determine the adjacent exon given the selected transcript
-        if not self._is_exonic_breakpoint(genomic_pos, tx_exons):
-            exon_num = self._get_adjacent_exon(
-                tx_exons_genomic_coords=tx_exons,
-                strand=strand,
-                start=genomic_pos if is_seg_start else None,
-                end=genomic_pos if not is_seg_start else None,
-            )
-
-            offset = self._get_exon_offset(
-                genomic_pos=genomic_pos,
-                exon_boundary=tx_exons[exon_num].alt_start_i
-                if use_alt_start_i
-                else tx_exons[exon_num].alt_end_i,
-                strand=strand,
-            )
-
-            genomic_location, err_msg = self._get_vrs_seq_loc(
-                genomic_ac, genomic_pos, is_seg_start, strand
-            )
-            if err_msg:
-                return GenomicTxSeg(errors=[err_msg])
-
-            return GenomicTxSeg(
-                gene=gene,
-                genomic_ac=genomic_ac,
-                tx_ac=transcript,
-                seg=TxSegment(
-                    exon_ord=exon_num,
-                    offset=offset,
-                    genomic_location=genomic_location,
-                ),
-            )
-
-        return await self._get_tx_seg_genomic_metadata(
-            genomic_ac,
-            genomic_pos,
-            is_seg_start,
-            gene,
-            tx_ac=transcript,
-        )
-
     async def _genomic_to_tx_segment(
         self,
         genomic_pos: int,
@@ -1232,13 +1059,6 @@ async def _get_tx_seg_genomic_metadata(
             tx_ac = mane_data["RefSeq_nuc"]
             grch38_ac = mane_data["GRCh38_chr"]
 
-        # Always liftover to GRCh38
-        genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos(
-            genomic_ac, genomic_pos, grch38_ac=grch38_ac
-        )
-        if err_msg:
-            return GenomicTxSeg(errors=[err_msg])
-
         tx_exons = await self._get_all_exon_coords(tx_ac, genomic_ac=grch38_ac)
         if not tx_exons:
             return GenomicTxSeg(errors=[f"No exons found given {tx_ac}"])
diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py
index b783b4e..a77158f 100644
--- a/tests/mappers/test_exon_genomic_coords.py
+++ b/tests/mappers/test_exon_genomic_coords.py
@@ -1437,9 +1437,9 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic):
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end))
 
-    # inputs = {"gene": "WEE1", "chromosome": "11", "seg_end_genomic": 9609996}
-    # resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
-    # assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end))
+    inputs = {"gene": "WEE1", "chromosome": "11", "seg_end_genomic": 9588449}
+    resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
+    assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end))
 
     inputs = {"transcript": "NM_003390.3", "exon_start": 2}
     resp = await test_egc_mapper.tx_segment_to_genomic(**inputs)

From 9bc2c31ff434f5fed5ef859f732190c40f9cf8cb Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Fri, 3 Jan 2025 11:06:52 -0500
Subject: [PATCH 06/16] Store requested changes

---
 src/cool_seq_tool/mappers/exon_genomic_coords.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index faf1ea9..549166f 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -418,8 +418,6 @@ async def genomic_to_tx_segment(
 
         If liftover to GRCh38 is unsuccessful, will return errors.
 
-        Must provide inter-residue coordinates.
-
         MANE Transcript data will be returned if and only if ``transcript`` is not
         supplied. ``gene`` must be given in order to retrieve MANE Transcript data.
 
@@ -773,8 +771,7 @@ async def _genomic_to_tx_segment(
             genomic_ac_validation = await self.uta_db.validate_genomic_ac(genomic_ac)
             if not genomic_ac_validation:
                 return GenomicTxSeg(errors=[f"{genomic_ac} does not exist in UTA"])
-
-        if not genomic_ac:
+        else:
             genomic_acs, err_msg = self.seqrepo_access.chromosome_to_acs(chromosome)
 
             if not genomic_acs:

From 3a7c91459e7cdf5f01ce2a9a14ee2dbeb47f0f40 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Fri, 3 Jan 2025 12:58:41 -0500
Subject: [PATCH 07/16] Add support for providing assembly

---
 .../mappers/exon_genomic_coords.py            | 95 +++++++------------
 tests/mappers/test_exon_genomic_coords.py     | 61 ++++--------
 2 files changed, 56 insertions(+), 100 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index 549166f..32725d9 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -413,6 +413,7 @@ async def genomic_to_tx_segment(
         transcript: str | None = None,
         gene: str | None = None,
         coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
+        assembly: Assembly = Assembly.GRCH38,
     ) -> GenomicTxSegService:
         """Get transcript segment data for genomic data, lifted over to GRCh38.
 
@@ -452,6 +453,8 @@ async def genomic_to_tx_segment(
             value is provided.
         :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
             ``seg_end_genomic``. Expects inter-residue coordinates by default
+        :param assembly: The assembly that the supplied coordinate comes from. Set to
+            GRCh38 be default
         :return: Genomic data (inter-residue coordinates)
         """
         errors = []
@@ -477,6 +480,7 @@ async def genomic_to_tx_segment(
                 gene=gene,
                 is_seg_start=True,
                 coordinate_type=coordinate_type,
+                assembly=assembly,
             )
             if start_tx_seg_data.errors:
                 return _return_service_errors(start_tx_seg_data.errors)
@@ -497,6 +501,7 @@ async def genomic_to_tx_segment(
                 gene=gene,
                 is_seg_start=False,
                 coordinate_type=coordinate_type,
+                assembly=assembly,
             )
             if end_tx_seg_data.errors:
                 return _return_service_errors(end_tx_seg_data.errors)
@@ -727,6 +732,7 @@ async def _genomic_to_tx_segment(
         gene: str | None = None,
         is_seg_start: bool = True,
         coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
+        assembly: Assembly = Assembly.GRCH38,
     ) -> GenomicTxSeg:
         """Given genomic data, generate a boundary for a transcript segment.
 
@@ -749,6 +755,8 @@ async def _genomic_to_tx_segment(
             ``False`` if ``genomic_pos`` is where the transcript segment ends.
         :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
             ``seg_end_genomic``. Expects inter-residue coordinates by default
+        :param assembly: The assembly that the supplied coordinate comes from. Set to
+            GRCh38 be default
         :return: Data for a transcript segment boundary (inter-residue coordinates)
         """
         params = {key: None for key in GenomicTxSeg.model_fields}
@@ -771,6 +779,9 @@ async def _genomic_to_tx_segment(
             genomic_ac_validation = await self.uta_db.validate_genomic_ac(genomic_ac)
             if not genomic_ac_validation:
                 return GenomicTxSeg(errors=[f"{genomic_ac} does not exist in UTA"])
+            if assembly == Assembly.GRCH37:
+                grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac)
+                genomic_ac = grch38_ac[0]
         else:
             genomic_acs, err_msg = self.seqrepo_access.chromosome_to_acs(chromosome)
 
@@ -780,13 +791,15 @@ async def _genomic_to_tx_segment(
                 )
             genomic_ac = genomic_acs[0]
 
-        # Always liftover to GRCh38
-        genomic_ac, genomic_pos, err_msg = await self._get_grch38_ac_pos(
-            genomic_ac,
-            genomic_pos,
-        )
-        if err_msg:
-            return GenomicTxSeg(errors=[err_msg])
+        # Liftover to GRCh38 if the provided assembly is GRCh37
+        if assembly == Assembly.GRCH37:
+            genomic_pos = await self._get_grch38_pos(genomic_pos, genomic_ac)
+            if not genomic_pos:
+                return GenomicTxSeg(
+                    errors=[
+                        f"Lifting over {genomic_pos} on {genomic_ac} from {Assembly.GRCH37.value} to {Assembly.GRCH38.value} was unsuccessful."
+                    ]
+                )
 
         # Select a transcript if not provided
         if not transcript:
@@ -896,59 +909,23 @@ async def _genomic_to_tx_segment(
             tx_ac=transcript,
         )
 
-    async def _get_grch38_ac_pos(
-        self,
-        genomic_ac: str,
-        genomic_pos: int,
-        grch38_ac: str | None = None,
-    ) -> tuple[str | None, int | None, str | None]:
-        """Get GRCh38 genomic representation for accession and position
-
-        :param genomic_ac: RefSeq genomic accession (GRCh37 or GRCh38 assembly)
-        :param genomic_pos: Genomic position on ``genomic_ac``
-        :param grch38_ac: A valid GRCh38 genomic accession for ``genomic_ac``. If not
-            provided, will attempt to retrieve associated GRCh38 accession from UTA.
-        :return: Tuple containing GRCh38 accession, GRCh38 position, and error message
-            if unable to get GRCh38 representation
-        """
-        # Validate accession exists
-        if not grch38_ac:
-            grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac)
-            if not grch38_ac:
-                return None, None, f"Unrecognized genomic accession: {genomic_ac}."
-
-            grch38_ac = grch38_ac[0]
-
-        if grch38_ac != genomic_ac:
-            # Ensure genomic_ac is GRCh37
-            chromosome, _ = self.seqrepo_access.translate_identifier(
-                genomic_ac, Assembly.GRCH37.value
-            )
-            if not chromosome:
-                _logger.warning(
-                    "SeqRepo could not find associated %s assembly for genomic accession %s.",
-                    Assembly.GRCH37.value,
-                    genomic_ac,
-                )
-                return (
-                    None,
-                    None,
-                    f"`genomic_ac` must use {Assembly.GRCH37.value} or {Assembly.GRCH38.value} assembly.",
-                )
-            chromosome = chromosome[-1].split(":")[-1]
-            liftover_data = self.liftover.get_liftover(
-                chromosome, genomic_pos, Assembly.GRCH38
-            )
-            if liftover_data is None:
-                return (
-                    None,
-                    None,
-                    f"Lifting over {genomic_pos} on {genomic_ac} from {Assembly.GRCH37.value} to {Assembly.GRCH38.value} was unsuccessful.",
-                )
-            genomic_pos = liftover_data[1]
-            genomic_ac = grch38_ac
+    async def _get_grch38_pos(self, genomic_pos: int, genomic_ac: str) -> int | None:
+        """Liftover a GRCh37 coordinate to GRCh38
 
-        return genomic_ac, genomic_pos, None
+        :param genomic_pos: A genomic coordinate in GRCh37
+        :param genomic_ac: The genomic accession in GRCh38
+        :return The genomic coordinate in GRCh38
+        """
+        chromosome, _ = self.seqrepo_access.translate_identifier(
+            genomic_ac, target_namespaces=Assembly.GRCH38.value
+        )
+        chromosome = chromosome[-1].split(":")[-1]
+        liftover_data = self.liftover.get_liftover(
+            chromosome, genomic_pos, Assembly.GRCH38
+        )
+        if liftover_data is None:
+            return None
+        return liftover_data[1]
 
     async def _validate_gene_coordinates(
         self,
diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py
index a77158f..d259818 100644
--- a/tests/mappers/test_exon_genomic_coords.py
+++ b/tests/mappers/test_exon_genomic_coords.py
@@ -10,6 +10,7 @@
     _ExonCoord,
 )
 from cool_seq_tool.schemas import (
+    Assembly,
     CoordinateType,
     Strand,
 )
@@ -704,48 +705,15 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
 
 
 @pytest.mark.asyncio()
-async def test_get_grch38_ac_pos(test_egc_mapper):
-    """Test that _get_grch38_ac_pos works correctly"""
-    grch38_ac = "NC_000001.11"
-    grch38_pos = 154192135
-    expected = grch38_ac, grch38_pos, None
-
-    # GRCh37 provided
-    grch38_data = await test_egc_mapper._get_grch38_ac_pos("NC_000001.10", 154164611)
-    assert grch38_data == expected
-
-    # GRCh38 provided, no grch38_ac
-    grch38_data = await test_egc_mapper._get_grch38_ac_pos(grch38_ac, grch38_pos)
-    assert grch38_data == expected
-
-    # GRCh38 and grch38_ac provided
-    grch38_data = await test_egc_mapper._get_grch38_ac_pos(
-        grch38_ac, grch38_pos, grch38_ac=grch38_ac
-    )
-    assert grch38_data == expected
-
-    # Unrecognized accession
-    invalid_ac = "NC_0000026.10"
-    grch38_data = await test_egc_mapper._get_grch38_ac_pos(invalid_ac, 154164611)
-    assert grch38_data == (None, None, f"Unrecognized genomic accession: {invalid_ac}.")
-
-    # GRCh36 used
-    grch38_data = await test_egc_mapper._get_grch38_ac_pos("NC_000001.9", 154164611)
-    assert grch38_data == (
-        None,
-        None,
-        "`genomic_ac` must use GRCh37 or GRCh38 assembly.",
-    )
+async def test_get_grch38_pos(test_egc_mapper):
+    """Test that get_grch38_pos works correctly"""
+    genomic_pos = await test_egc_mapper._get_grch38_pos(9609996, "NC_000011.10")
+    assert genomic_pos == 9588449
 
-    # Unsuccessful liftover
-    grch38_data = await test_egc_mapper._get_grch38_ac_pos(
-        "NC_000001.10", 9999999999999999999
-    )
-    assert grch38_data == (
-        None,
-        None,
-        "Lifting over 9999999999999999999 on NC_000001.10 from GRCh37 to GRCh38 was unsuccessful.",
+    genomic_pos = await test_egc_mapper._get_grch38_pos(
+        9609996999999999, "NC_000011.10"
     )
+    assert genomic_pos is None
 
 
 @pytest.mark.asyncio()
@@ -1268,6 +1236,7 @@ async def test_braf(test_egc_mapper, mane_braf):
         "seg_start_genomic": 140501359,  # GRCh38 coords: 140801559
         "seg_end_genomic": 140453136,  # GRCh38 coords: 140753336
         "gene": "BRAF",
+        "assembly": Assembly.GRCH37.value,
     }
     # MANE
     g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
@@ -1291,6 +1260,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11):
         "seg_start_genomic": 9597639,
         "seg_end_genomic": 9609996,
         "transcript": "NM_003390.3",
+        "assembly": Assembly.GRCH37.value,
     }
     g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(g_to_t_resp, wee1_exon2_exon11)
@@ -1307,6 +1277,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11):
         "seg_end_genomic": 9609996,
         "transcript": "NM_003390.3",
         "gene": "WEE1",
+        "assembly": Assembly.GRCH37.value,
     }
     g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(g_to_t_resp, wee1_exon2_exon11)
@@ -1322,6 +1293,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11):
         "seg_start_genomic": 9597639,  # GRCh38 coords: 9576092
         "seg_end_genomic": 9609996,  # GRCh38 coords: 9588449
         "gene": "WEE1",
+        "assembly": Assembly.GRCH37.value,
     }
     g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(g_to_t_resp, mane_wee1_exon2_exon11)
@@ -1433,11 +1405,17 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic):
         "gene": "WEE1",
         "genomic_ac": "NC_000011.9",
         "seg_end_genomic": 9609996,
+        "assembly": Assembly.GRCH37.value,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end))
 
-    inputs = {"gene": "WEE1", "chromosome": "11", "seg_end_genomic": 9588449}
+    inputs = {
+        "gene": "WEE1",
+        "chromosome": "11",
+        "seg_end_genomic": 9609996,
+        "assembly": Assembly.GRCH37.value,
+    }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end))
 
@@ -1471,6 +1449,7 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic):
         seg_start_genomic=73442503,
         seg_end_genomic=73457929,  # not on an exon
         gene="ELN",
+        assembly=Assembly.GRCH37.value,
     )
     genomic_tx_seg_service_checks(resp, eln_grch38_intronic)
 

From 18f74d92e80a97b06d538f5dc0ea1981f4091f71 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Mon, 6 Jan 2025 12:09:54 -0500
Subject: [PATCH 08/16] Add more comments

---
 src/cool_seq_tool/mappers/exon_genomic_coords.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index 32725d9..cbb560f 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -840,6 +840,7 @@ async def _genomic_to_tx_segment(
         if not tx_exons:
             return GenomicTxSeg(errors=[f"No exons found given {transcript}"])
 
+        # Determine if genomic_pos needs to be modified
         strand = Strand(tx_exons[0].alt_strand)
         params["strand"] = strand
         use_alt_start_i = self._use_alt_start_i(
@@ -848,7 +849,7 @@ async def _genomic_to_tx_segment(
         if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
             genomic_pos = genomic_pos - 1  # Convert residue coordinate to inter-residue
 
-        # gene is not required to liftover coordinates if tx_ac and genomic_ac are given, but we should set the associated gene
+        # Extract gene symbol given transcript accession
         if not gene:
             _gene, err_msg = await self._get_tx_ac_gene(transcript)
             if err_msg:

From 232673dfb3346491abdd6449d17cedb2c68f2f06 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Mon, 6 Jan 2025 14:01:05 -0500
Subject: [PATCH 09/16] Correct docstring in _validate_gene_coordinates

---
 src/cool_seq_tool/mappers/exon_genomic_coords.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index cbb560f..d8a2afe 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -934,10 +934,8 @@ async def _validate_gene_coordinates(
         genomic_ac: str,
         gene: str,
     ) -> bool:
-        """Get gene given a genomic accession and position.
-
-        If multiple genes are found for a given ``pos`` and ``genomic_ac``, only one
-        gene will be returned.
+        """Validate that a genomic coordinate falls within the first and last exon
+            given a gene and accession
 
         :param pos: Genomic position on ``genomic_ac``
         :param genomic_ac: RefSeq genomic accession, e.g. ``"NC_000007.14"``

From 1c03cacab81f915d70d6d4435e9b4f62e90a9c4b Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Mon, 6 Jan 2025 15:03:11 -0500
Subject: [PATCH 10/16] Update assembly parameter, add chromosome as an
 optional parameter

---
 .../mappers/exon_genomic_coords.py            | 38 +++++++++++--------
 tests/mappers/test_exon_genomic_coords.py     | 19 ++++++----
 2 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index d8a2afe..0e4a842 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -413,7 +413,7 @@ async def genomic_to_tx_segment(
         transcript: str | None = None,
         gene: str | None = None,
         coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
-        assembly: Assembly = Assembly.GRCH38,
+        starting_assembly: Assembly = Assembly.GRCH38,
     ) -> GenomicTxSegService:
         """Get transcript segment data for genomic data, lifted over to GRCh38.
 
@@ -453,8 +453,8 @@ async def genomic_to_tx_segment(
             value is provided.
         :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
             ``seg_end_genomic``. Expects inter-residue coordinates by default
-        :param assembly: The assembly that the supplied coordinate comes from. Set to
-            GRCh38 be default
+        :param starting_assembly: The assembly that the supplied coordinate comes from. Set to
+            GRCh38 by default
         :return: Genomic data (inter-residue coordinates)
         """
         errors = []
@@ -480,7 +480,7 @@ async def genomic_to_tx_segment(
                 gene=gene,
                 is_seg_start=True,
                 coordinate_type=coordinate_type,
-                assembly=assembly,
+                starting_assembly=starting_assembly,
             )
             if start_tx_seg_data.errors:
                 return _return_service_errors(start_tx_seg_data.errors)
@@ -501,7 +501,7 @@ async def genomic_to_tx_segment(
                 gene=gene,
                 is_seg_start=False,
                 coordinate_type=coordinate_type,
-                assembly=assembly,
+                starting_assembly=starting_assembly,
             )
             if end_tx_seg_data.errors:
                 return _return_service_errors(end_tx_seg_data.errors)
@@ -732,7 +732,7 @@ async def _genomic_to_tx_segment(
         gene: str | None = None,
         is_seg_start: bool = True,
         coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
-        assembly: Assembly = Assembly.GRCH38,
+        starting_assembly: Assembly = Assembly.GRCH38,
     ) -> GenomicTxSeg:
         """Given genomic data, generate a boundary for a transcript segment.
 
@@ -755,8 +755,8 @@ async def _genomic_to_tx_segment(
             ``False`` if ``genomic_pos`` is where the transcript segment ends.
         :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
             ``seg_end_genomic``. Expects inter-residue coordinates by default
-        :param assembly: The assembly that the supplied coordinate comes from. Set to
-            GRCh38 be default
+        :param starting_assembly: The assembly that the supplied coordinate comes from. Set to
+            GRCh38 by default
         :return: Data for a transcript segment boundary (inter-residue coordinates)
         """
         params = {key: None for key in GenomicTxSeg.model_fields}
@@ -779,7 +779,7 @@ async def _genomic_to_tx_segment(
             genomic_ac_validation = await self.uta_db.validate_genomic_ac(genomic_ac)
             if not genomic_ac_validation:
                 return GenomicTxSeg(errors=[f"{genomic_ac} does not exist in UTA"])
-            if assembly == Assembly.GRCH37:
+            if starting_assembly == Assembly.GRCH37:
                 grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac)
                 genomic_ac = grch38_ac[0]
         else:
@@ -792,8 +792,10 @@ async def _genomic_to_tx_segment(
             genomic_ac = genomic_acs[0]
 
         # Liftover to GRCh38 if the provided assembly is GRCh37
-        if assembly == Assembly.GRCH37:
-            genomic_pos = await self._get_grch38_pos(genomic_pos, genomic_ac)
+        if starting_assembly == Assembly.GRCH37:
+            genomic_pos = await self._get_grch38_pos(
+                genomic_pos, genomic_ac, chromosome=chromosome if chromosome else None
+            )
             if not genomic_pos:
                 return GenomicTxSeg(
                     errors=[
@@ -910,17 +912,21 @@ async def _genomic_to_tx_segment(
             tx_ac=transcript,
         )
 
-    async def _get_grch38_pos(self, genomic_pos: int, genomic_ac: str) -> int | None:
+    async def _get_grch38_pos(
+        self, genomic_pos: int, genomic_ac: str, chromosome: str | None = None
+    ) -> int | None:
         """Liftover a GRCh37 coordinate to GRCh38
 
         :param genomic_pos: A genomic coordinate in GRCh37
         :param genomic_ac: The genomic accession in GRCh38
+        :param chromosome: The chromosome that genomic_pos occurs on
         :return The genomic coordinate in GRCh38
         """
-        chromosome, _ = self.seqrepo_access.translate_identifier(
-            genomic_ac, target_namespaces=Assembly.GRCH38.value
-        )
-        chromosome = chromosome[-1].split(":")[-1]
+        if not chromosome:
+            chromosome, _ = self.seqrepo_access.translate_identifier(
+                genomic_ac, target_namespaces=Assembly.GRCH38.value
+            )
+            chromosome = chromosome[-1].split(":")[-1]
         liftover_data = self.liftover.get_liftover(
             chromosome, genomic_pos, Assembly.GRCH38
         )
diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py
index d259818..4566068 100644
--- a/tests/mappers/test_exon_genomic_coords.py
+++ b/tests/mappers/test_exon_genomic_coords.py
@@ -710,6 +710,11 @@ async def test_get_grch38_pos(test_egc_mapper):
     genomic_pos = await test_egc_mapper._get_grch38_pos(9609996, "NC_000011.10")
     assert genomic_pos == 9588449
 
+    genomic_pos = await test_egc_mapper._get_grch38_pos(
+        9609996, "NC_000011.10", "chr11"
+    )
+    assert genomic_pos == 9588449
+
     genomic_pos = await test_egc_mapper._get_grch38_pos(
         9609996999999999, "NC_000011.10"
     )
@@ -1236,7 +1241,7 @@ async def test_braf(test_egc_mapper, mane_braf):
         "seg_start_genomic": 140501359,  # GRCh38 coords: 140801559
         "seg_end_genomic": 140453136,  # GRCh38 coords: 140753336
         "gene": "BRAF",
-        "assembly": Assembly.GRCH37.value,
+        "starting_assembly": Assembly.GRCH37.value,
     }
     # MANE
     g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
@@ -1260,7 +1265,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11):
         "seg_start_genomic": 9597639,
         "seg_end_genomic": 9609996,
         "transcript": "NM_003390.3",
-        "assembly": Assembly.GRCH37.value,
+        "starting_assembly": Assembly.GRCH37.value,
     }
     g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(g_to_t_resp, wee1_exon2_exon11)
@@ -1277,7 +1282,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11):
         "seg_end_genomic": 9609996,
         "transcript": "NM_003390.3",
         "gene": "WEE1",
-        "assembly": Assembly.GRCH37.value,
+        "starting_assembly": Assembly.GRCH37.value,
     }
     g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(g_to_t_resp, wee1_exon2_exon11)
@@ -1293,7 +1298,7 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11):
         "seg_start_genomic": 9597639,  # GRCh38 coords: 9576092
         "seg_end_genomic": 9609996,  # GRCh38 coords: 9588449
         "gene": "WEE1",
-        "assembly": Assembly.GRCH37.value,
+        "starting_assembly": Assembly.GRCH37.value,
     }
     g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(g_to_t_resp, mane_wee1_exon2_exon11)
@@ -1405,7 +1410,7 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic):
         "gene": "WEE1",
         "genomic_ac": "NC_000011.9",
         "seg_end_genomic": 9609996,
-        "assembly": Assembly.GRCH37.value,
+        "starting_assembly": Assembly.GRCH37.value,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end))
@@ -1414,7 +1419,7 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic):
         "gene": "WEE1",
         "chromosome": "11",
         "seg_end_genomic": 9609996,
-        "assembly": Assembly.GRCH37.value,
+        "starting_assembly": Assembly.GRCH37.value,
     }
     resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     assert all((resp.gene, resp.genomic_ac, resp.tx_ac, resp.seg_end))
@@ -1449,7 +1454,7 @@ async def test_valid_inputs(test_egc_mapper, eln_grch38_intronic):
         seg_start_genomic=73442503,
         seg_end_genomic=73457929,  # not on an exon
         gene="ELN",
-        assembly=Assembly.GRCH37.value,
+        starting_assembly=Assembly.GRCH37.value,
     )
     genomic_tx_seg_service_checks(resp, eln_grch38_intronic)
 

From 4b336ba923554a7895e2d0457a53a60556917c0f Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Tue, 7 Jan 2025 13:23:04 -0500
Subject: [PATCH 11/16] Ensure merge conflicts are resolved

---
 src/cool_seq_tool/mappers/exon_genomic_coords.py | 10 ++++++----
 tests/mappers/test_exon_genomic_coords.py        |  6 +++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index cf77003..ddf5e79 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -780,7 +780,9 @@ async def _genomic_to_tx_segment(
         if genomic_ac:
             genomic_ac_validation = await self.uta_db.validate_genomic_ac(genomic_ac)
             if not genomic_ac_validation:
-                return GenomicTxSeg(errors=[f"{genomic_ac} does not exist in UTA"])
+                return GenomicTxSeg(
+                    errors=[f"Genomic accession does not exist in UTA: {genomic_ac}"]
+                )
             if starting_assembly == Assembly.GRCH37:
                 grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac)
                 genomic_ac = grch38_ac[0]
@@ -796,7 +798,7 @@ async def _genomic_to_tx_segment(
         # Liftover to GRCh38 if the provided assembly is GRCh37
         if starting_assembly == Assembly.GRCH37:
             genomic_pos = await self._get_grch38_pos(
-                genomic_pos, genomic_ac, chromosome=chromosome if chromosome else None
+                genomic_ac, genomic_pos, chromosome=chromosome if chromosome else None
             )
             if not genomic_pos:
                 return GenomicTxSeg(
@@ -916,12 +918,12 @@ async def _genomic_to_tx_segment(
             ),
         )
 
-    async def _get_grch38_ac_pos(
+    async def _get_grch38_pos(
         self,
         genomic_ac: str,
         genomic_pos: int,
         chromosome: str | None = None,
-    ) -> tuple[str | None, int | None, str | None]:
+    ) -> int | None:
         """Get GRCh38 genomic representation for accession and position
 
         :param genomic_pos: A genomic coordinate in GRCh37
diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py
index f46625d..c1d57ba 100644
--- a/tests/mappers/test_exon_genomic_coords.py
+++ b/tests/mappers/test_exon_genomic_coords.py
@@ -707,16 +707,16 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
 @pytest.mark.asyncio()
 async def test_get_grch38_pos(test_egc_mapper):
     """Test that get_grch38_pos works correctly"""
-    genomic_pos = await test_egc_mapper._get_grch38_pos(9609996, "NC_000011.10")
+    genomic_pos = await test_egc_mapper._get_grch38_pos("NC_000011.10", 9609996)
     assert genomic_pos == 9588449
 
     genomic_pos = await test_egc_mapper._get_grch38_pos(
-        9609996, "NC_000011.10", "chr11"
+        "NC_000011.10", 9609996, "chr11"
     )
     assert genomic_pos == 9588449
 
     genomic_pos = await test_egc_mapper._get_grch38_pos(
-        9609996999999999, "NC_000011.10"
+        "NC_000011.10", 9609996999999999
     )
     assert genomic_pos is None
 

From 44e1f34454f0ccefd5128a1ba866d7af77365c56 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Tue, 7 Jan 2025 17:06:36 -0500
Subject: [PATCH 12/16] Add docstring, change liftover data access

---
 .../mappers/exon_genomic_coords.py              |  8 +++-----
 tests/mappers/test_exon_genomic_coords.py       | 17 -----------------
 2 files changed, 3 insertions(+), 22 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index ddf5e79..a2d9893 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -454,7 +454,7 @@ async def genomic_to_tx_segment(
         :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
             ``seg_end_genomic``. Expects inter-residue coordinates by default
         :param starting_assembly: The assembly that the supplied coordinate comes from. Set to
-            GRCh38 by default
+            GRCh38 by default. Will attempt to liftover if starting assembly is GRCh37
         :return: Genomic data (inter-residue coordinates)
         """
         errors = []
@@ -759,7 +759,7 @@ async def _genomic_to_tx_segment(
         :param coordinate_type: Coordinate type for ``seg_start_genomic`` and
             ``seg_end_genomic``. Expects inter-residue coordinates by default
         :param starting_assembly: The assembly that the supplied coordinate comes from. Set to
-            GRCh38 by default
+            GRCh38 by default. Will attempt to liftover if starting assembly is GRCh37
         :return: Data for a transcript segment boundary (inter-residue coordinates)
         """
         params = {key: None for key in GenomicTxSeg.model_fields}
@@ -939,9 +939,7 @@ async def _get_grch38_pos(
         liftover_data = self.liftover.get_liftover(
             chromosome, genomic_pos, Assembly.GRCH38
         )
-        if liftover_data is None:
-            return None
-        return liftover_data[1]
+        return liftover_data[1] if liftover_data else None
 
     async def _validate_gene_coordinates(
         self,
diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py
index c1d57ba..cb2f8e5 100644
--- a/tests/mappers/test_exon_genomic_coords.py
+++ b/tests/mappers/test_exon_genomic_coords.py
@@ -704,23 +704,6 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
         assert len(actual.errors) > 0
 
 
-@pytest.mark.asyncio()
-async def test_get_grch38_pos(test_egc_mapper):
-    """Test that get_grch38_pos works correctly"""
-    genomic_pos = await test_egc_mapper._get_grch38_pos("NC_000011.10", 9609996)
-    assert genomic_pos == 9588449
-
-    genomic_pos = await test_egc_mapper._get_grch38_pos(
-        "NC_000011.10", 9609996, "chr11"
-    )
-    assert genomic_pos == 9588449
-
-    genomic_pos = await test_egc_mapper._get_grch38_pos(
-        "NC_000011.10", 9609996999999999
-    )
-    assert genomic_pos is None
-
-
 @pytest.mark.asyncio()
 async def test_get_all_exon_coords(
     test_egc_mapper, nm_152263_exons, nm_152263_exons_genomic_coords

From 0734fbdb2b470422f23ab696d103fbf771e67768 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Wed, 8 Jan 2025 09:19:13 -0500
Subject: [PATCH 13/16] Remove duplicate error statement

---
 tests/mappers/test_exon_genomic_coords.py | 28 +++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py
index cb2f8e5..d5dcfc8 100644
--- a/tests/mappers/test_exon_genomic_coords.py
+++ b/tests/mappers/test_exon_genomic_coords.py
@@ -1221,8 +1221,8 @@ async def test_braf(test_egc_mapper, mane_braf):
     """
     inputs = {
         "genomic_ac": "NC_000007.13",
-        "seg_start_genomic": 140501359,  # GRCh38 coords: 140801559
-        "seg_end_genomic": 140453136,  # GRCh38 coords: 140753336
+        "seg_start_genomic": 140501359,
+        "seg_end_genomic": 140453136,
         "gene": "BRAF",
         "starting_assembly": Assembly.GRCH37.value,
     }
@@ -1230,6 +1230,16 @@ async def test_braf(test_egc_mapper, mane_braf):
     g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(g_to_t_resp, mane_braf)
 
+    inputs = {
+        "genomic_ac": "NC_000007.14",
+        "seg_start_genomic": 140801559,
+        "seg_end_genomic": 140753336,
+        "gene": "BRAF",
+        "starting_assembly": Assembly.GRCH38.value,
+    }
+    g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
+    genomic_tx_seg_service_checks(g_to_t_resp, mane_braf)
+
     expected = mane_braf.model_copy(deep=True)
     expected.seg_start.genomic_location.end = 140801559
     t_to_g_resp = await test_egc_mapper.tx_segment_to_genomic(
@@ -1278,14 +1288,24 @@ async def test_wee1(test_egc_mapper, wee1_exon2_exon11, mane_wee1_exon2_exon11):
     # MANE since no transcript provided
     inputs = {
         "genomic_ac": "NC_000011.9",
-        "seg_start_genomic": 9597639,  # GRCh38 coords: 9576092
-        "seg_end_genomic": 9609996,  # GRCh38 coords: 9588449
+        "seg_start_genomic": 9597639,
+        "seg_end_genomic": 9609996,
         "gene": "WEE1",
         "starting_assembly": Assembly.GRCH37.value,
     }
     g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
     genomic_tx_seg_service_checks(g_to_t_resp, mane_wee1_exon2_exon11)
 
+    inputs = {
+        "genomic_ac": "NC_000011.10",
+        "seg_start_genomic": 9576092,
+        "seg_end_genomic": 9588449,
+        "gene": "WEE1",
+        "starting_assembly": Assembly.GRCH38.value,
+    }
+    g_to_t_resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
+    genomic_tx_seg_service_checks(g_to_t_resp, mane_wee1_exon2_exon11)
+
     t_to_g_resp = await test_egc_mapper.tx_segment_to_genomic(
         **get_t_to_g_args(g_to_t_resp)
     )

From 39b4a75ef3e889181b0d5b39d500946488900ba9 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Wed, 8 Jan 2025 10:50:28 -0500
Subject: [PATCH 14/16] Update logic for genomic accession validation

---
 src/cool_seq_tool/mappers/exon_genomic_coords.py | 9 ++++-----
 tests/mappers/test_exon_genomic_coords.py        | 4 ++--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index a2d9893..dd4cdb3 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -778,14 +778,13 @@ async def _genomic_to_tx_segment(
                 )
 
         if genomic_ac:
-            genomic_ac_validation = await self.uta_db.validate_genomic_ac(genomic_ac)
-            if not genomic_ac_validation:
+            grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac)
+            if grch38_ac:
+                genomic_ac = grch38_ac[0]
+            else:
                 return GenomicTxSeg(
                     errors=[f"Genomic accession does not exist in UTA: {genomic_ac}"]
                 )
-            if starting_assembly == Assembly.GRCH37:
-                grch38_ac = await self.uta_db.get_newest_assembly_ac(genomic_ac)
-                genomic_ac = grch38_ac[0]
         else:
             genomic_acs, err_msg = self.seqrepo_access.chromosome_to_acs(chromosome)
 
diff --git a/tests/mappers/test_exon_genomic_coords.py b/tests/mappers/test_exon_genomic_coords.py
index d5dcfc8..57f277c 100644
--- a/tests/mappers/test_exon_genomic_coords.py
+++ b/tests/mappers/test_exon_genomic_coords.py
@@ -1499,13 +1499,13 @@ async def test_invalid(test_egc_mapper):
 
     # Invalid accession
     resp = await test_egc_mapper.genomic_to_tx_segment(
-        genomic_ac="NC_000001.200",
+        genomic_ac="NC_000035.200",
         seg_start_genomic=154191901,
         seg_end_genomic=154192135,
         transcript="NM_152263.3",
     )
     genomic_tx_seg_service_checks(resp, is_valid=False)
-    assert resp.errors == ["Genomic accession does not exist in UTA: NC_000001.200"]
+    assert resp.errors == ["Genomic accession does not exist in UTA: NC_000035.200"]
 
     # Invalid coordinates
     resp = await test_egc_mapper.genomic_to_tx_segment(

From 06244a91d3e09bc2bc3680d5d10abe00cbd04e22 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Wed, 8 Jan 2025 11:57:59 -0500
Subject: [PATCH 15/16] Update docstring

---
 src/cool_seq_tool/mappers/exon_genomic_coords.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index dd4cdb3..46f89bb 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -442,7 +442,8 @@ async def genomic_to_tx_segment(
             used.
         :param genomic_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
             must provide ``chromosome. If ``chromosome`` is also provided,
-            ``genomic_ac`` will be used.
+            ``genomic_ac`` will be used. If the genomic accession is from GRCh37, it
+            will be lifted over to GRCh38
         :param seg_start_genomic: Genomic position where the transcript segment starts
         :param seg_end_genomic: Genomic position where the transcript segment ends
         :param transcript: The transcript to use. If this is not given, we will try the
@@ -749,7 +750,8 @@ async def _genomic_to_tx_segment(
             If ``genomic_ac`` is also provided, ``genomic_ac`` will be used.
         :param genomic_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
             must provide ``chromosome. If ``chromosome`` is also provided, ``genomic_ac``
-            will be used.
+            will be used. If the genomic accession is from GRCh37, it will be lifted
+            over to GRCh38
         :param transcript: The transcript to use. If this is not given, we will try the
             following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
             Compatible Transcript

From 2f6b224be1121a6c2bd058cea6b8458914a31ba3 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Wed, 8 Jan 2025 12:01:34 -0500
Subject: [PATCH 16/16] Indicate that accession version will be ignored

---
 src/cool_seq_tool/mappers/exon_genomic_coords.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/cool_seq_tool/mappers/exon_genomic_coords.py b/src/cool_seq_tool/mappers/exon_genomic_coords.py
index 46f89bb..40756fb 100644
--- a/src/cool_seq_tool/mappers/exon_genomic_coords.py
+++ b/src/cool_seq_tool/mappers/exon_genomic_coords.py
@@ -443,7 +443,8 @@ async def genomic_to_tx_segment(
         :param genomic_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
             must provide ``chromosome. If ``chromosome`` is also provided,
             ``genomic_ac`` will be used. If the genomic accession is from GRCh37, it
-            will be lifted over to GRCh38
+            will be lifted over to GRCh38 and the original accession version will be
+            ignored
         :param seg_start_genomic: Genomic position where the transcript segment starts
         :param seg_end_genomic: Genomic position where the transcript segment ends
         :param transcript: The transcript to use. If this is not given, we will try the
@@ -751,7 +752,7 @@ async def _genomic_to_tx_segment(
         :param genomic_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
             must provide ``chromosome. If ``chromosome`` is also provided, ``genomic_ac``
             will be used. If the genomic accession is from GRCh37, it will be lifted
-            over to GRCh38
+            over to GRCh38 and the original accession version will be ignored
         :param transcript: The transcript to use. If this is not given, we will try the
             following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
             Compatible Transcript