Skip to content

Commit

Permalink
issue biocommons#606 - reformat code
Browse files Browse the repository at this point in the history
  • Loading branch information
kayleeyuhas committed Dec 23, 2020
1 parent 3bb89f7 commit 5d579a8
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 65 deletions.
148 changes: 85 additions & 63 deletions hgvs/variantmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,11 @@ class VariantMapper(object):
:class:`hgvs.sequencevariant.SequenceVariant`.
"""

def __init__(self,
hdp,
replace_reference=hgvs.global_config.mapping.replace_reference,
prevalidation_level=hgvs.global_config.mapping.prevalidation_level,
add_gene_symbol=hgvs.global_config.mapping.add_gene_symbol
):
add_gene_symbol=hgvs.global_config.mapping.add_gene_symbol):
"""
:param bool replace_reference: replace reference (entails additional network access)
:param str prevalidation_level: None or Intrinsic or Extrinsic validation before mapping
Expand All @@ -93,8 +91,9 @@ def __init__(self,
self._validator = hgvs.validator.IntrinsicValidator(strict=False)
else:
self._validator = hgvs.validator.Validator(self.hdp, strict=False)
self.left_normalizer = hgvs.normalizer.Normalizer(hdp, shuffle_direction=5, variantmapper=self)

self.left_normalizer = hgvs.normalizer.Normalizer(hdp,
shuffle_direction=5,
variantmapper=self)

# ############################################################################
# g⟷t
Expand All @@ -104,14 +103,19 @@ def g_to_t(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln
if self._validator:
self._validator.validate(var_g)
var_g.fill_ref(self.hdp)
mapper = self._fetch_AlignmentMapper(
tx_ac=tx_ac, alt_ac=var_g.ac, alt_aln_method=alt_aln_method)
mapper = self._fetch_AlignmentMapper(tx_ac=tx_ac,
alt_ac=var_g.ac,
alt_aln_method=alt_aln_method)
if mapper.is_coding_transcript:
var_out = VariantMapper.g_to_c(
self, var_g=var_g, tx_ac=tx_ac, alt_aln_method=alt_aln_method)
var_out = VariantMapper.g_to_c(self,
var_g=var_g,
tx_ac=tx_ac,
alt_aln_method=alt_aln_method)
else:
var_out = VariantMapper.g_to_n(
self, var_g=var_g, tx_ac=tx_ac, alt_aln_method=alt_aln_method)
var_out = VariantMapper.g_to_n(self,
var_g=var_g,
tx_ac=tx_ac,
alt_aln_method=alt_aln_method)
return var_out

def t_to_g(self, var_t, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln_method):
Expand All @@ -120,14 +124,19 @@ def t_to_g(self, var_t, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_al
if self._validator:
self._validator.validate(var_t)
var_t.fill_ref(self.hdp)
mapper = self._fetch_AlignmentMapper(
tx_ac=var_t.ac, alt_ac=alt_ac, alt_aln_method=alt_aln_method)
mapper = self._fetch_AlignmentMapper(tx_ac=var_t.ac,
alt_ac=alt_ac,
alt_aln_method=alt_aln_method)
if var_t.type == "c":
var_out = VariantMapper.c_to_g(
self, var_c=var_t, alt_ac=alt_ac, alt_aln_method=alt_aln_method)
var_out = VariantMapper.c_to_g(self,
var_c=var_t,
alt_ac=alt_ac,
alt_aln_method=alt_aln_method)
else:
var_out = VariantMapper.n_to_g(
self, var_n=var_t, alt_ac=alt_ac, alt_aln_method=alt_aln_method)
var_out = VariantMapper.n_to_g(self,
var_n=var_t,
alt_ac=alt_ac,
alt_aln_method=alt_aln_method)
return var_out

# ############################################################################
Expand All @@ -149,12 +158,12 @@ def g_to_n(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln
raise HGVSInvalidVariantError("Expected a g. variant; got " + str(var_g))
if self._validator:
self._validator.validate(var_g)
mapper = self._fetch_AlignmentMapper(
tx_ac=tx_ac, alt_ac=var_g.ac, alt_aln_method=alt_aln_method)
mapper = self._fetch_AlignmentMapper(tx_ac=tx_ac,
alt_ac=var_g.ac,
alt_aln_method=alt_aln_method)

if (mapper.strand == -1
and not hgvs.global_config.mapping.strict_bounds
and not mapper.g_interval_is_inbounds(var_g.posedit.pos)):
if (mapper.strand == -1 and not hgvs.global_config.mapping.strict_bounds
and not mapper.g_interval_is_inbounds(var_g.posedit.pos)):
_logger.info("Renormalizing out-of-bounds minus strand variant on genomic sequence")
var_g = self.left_normalizer.normalize(var_g)

Expand All @@ -169,14 +178,14 @@ def g_to_n(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln
else:
# variant at alignment gap
pos_g = mapper.n_to_g(pos_n)
edit_n = hgvs.edit.NARefAlt(
ref='', alt=self._get_altered_sequence(mapper.strand, pos_g, var_g))
edit_n = hgvs.edit.NARefAlt(ref='',
alt=self._get_altered_sequence(mapper.strand, pos_g, var_g))
pos_n.uncertain = var_g.posedit.pos.uncertain
var_n = hgvs.sequencevariant.SequenceVariant(
ac=tx_ac, type="n", posedit=hgvs.posedit.PosEdit(pos_n, edit_n))
if (self.replace_reference
and var_n.posedit.pos.start.base >= 0
and var_n.posedit.pos.end.base < mapper.tgt_len):
var_n = hgvs.sequencevariant.SequenceVariant(ac=tx_ac,
type="n",
posedit=hgvs.posedit.PosEdit(pos_n, edit_n))
if (self.replace_reference and var_n.posedit.pos.start.base >= 0
and var_n.posedit.pos.end.base < mapper.tgt_len):
self._replace_reference(var_n)
if self.add_gene_symbol:
self._update_gene_symbol(var_n, var_g.gene)
Expand All @@ -200,8 +209,9 @@ def n_to_g(self, var_n, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_al
if self._validator:
self._validator.validate(var_n)
var_n.fill_ref(self.hdp)
mapper = self._fetch_AlignmentMapper(
tx_ac=var_n.ac, alt_ac=alt_ac, alt_aln_method=alt_aln_method)
mapper = self._fetch_AlignmentMapper(tx_ac=var_n.ac,
alt_ac=alt_ac,
alt_aln_method=alt_aln_method)
pos_g = mapper.n_to_g(var_n.posedit.pos)
if not pos_g.uncertain:
edit_g = self._convert_edit_check_strand(mapper.strand, var_n.posedit.edit)
Expand All @@ -212,11 +222,12 @@ def n_to_g(self, var_n, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_al
else:
# variant at alignment gap
pos_n = mapper.g_to_n(pos_g)
edit_g = hgvs.edit.NARefAlt(
ref='', alt=self._get_altered_sequence(mapper.strand, pos_n, var_n))
edit_g = hgvs.edit.NARefAlt(ref='',
alt=self._get_altered_sequence(mapper.strand, pos_n, var_n))
pos_g.uncertain = var_n.posedit.pos.uncertain
var_g = hgvs.sequencevariant.SequenceVariant(
ac=alt_ac, type="g", posedit=hgvs.posedit.PosEdit(pos_g, edit_g))
var_g = hgvs.sequencevariant.SequenceVariant(ac=alt_ac,
type="g",
posedit=hgvs.posedit.PosEdit(pos_g, edit_g))
if self.replace_reference:
self._replace_reference(var_g)
# No gene symbol for g. variants (actually, *should* for NG, but no way to distinguish)
Expand All @@ -242,8 +253,9 @@ def g_to_c(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln
if self._validator:
self._validator.validate(var_g)
var_g.fill_ref(self.hdp)
mapper = self._fetch_AlignmentMapper(
tx_ac=tx_ac, alt_ac=var_g.ac, alt_aln_method=alt_aln_method)
mapper = self._fetch_AlignmentMapper(tx_ac=tx_ac,
alt_ac=var_g.ac,
alt_aln_method=alt_aln_method)
pos_c = mapper.g_to_c(var_g.posedit.pos)
if not pos_c.uncertain:
edit_c = self._convert_edit_check_strand(mapper.strand, var_g.posedit.edit)
Expand All @@ -254,11 +266,12 @@ def g_to_c(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln
else:
# variant at alignment gap
pos_g = mapper.c_to_g(pos_c)
edit_c = hgvs.edit.NARefAlt(
ref='', alt=self._get_altered_sequence(mapper.strand, pos_g, var_g))
edit_c = hgvs.edit.NARefAlt(ref='',
alt=self._get_altered_sequence(mapper.strand, pos_g, var_g))
pos_c.uncertain = var_g.posedit.pos.uncertain
var_c = hgvs.sequencevariant.SequenceVariant(
ac=tx_ac, type="c", posedit=hgvs.posedit.PosEdit(pos_c, edit_c))
var_c = hgvs.sequencevariant.SequenceVariant(ac=tx_ac,
type="c",
posedit=hgvs.posedit.PosEdit(pos_c, edit_c))
if self.replace_reference:
self._replace_reference(var_c)
if self.add_gene_symbol:
Expand All @@ -283,8 +296,9 @@ def c_to_g(self, var_c, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_al
if self._validator:
self._validator.validate(var_c)
var_c.fill_ref(self.hdp)
mapper = self._fetch_AlignmentMapper(
tx_ac=var_c.ac, alt_ac=alt_ac, alt_aln_method=alt_aln_method)
mapper = self._fetch_AlignmentMapper(tx_ac=var_c.ac,
alt_ac=alt_ac,
alt_aln_method=alt_aln_method)
pos_g = mapper.c_to_g(var_c.posedit.pos)
if not pos_g.uncertain:
edit_g = self._convert_edit_check_strand(mapper.strand, var_c.posedit.edit)
Expand All @@ -298,11 +312,12 @@ def c_to_g(self, var_c, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_al
var_n.posedit.pos = mapper.c_to_n(var_c.posedit.pos)
var_n.type = 'n'
pos_n = mapper.g_to_n(pos_g)
edit_g = hgvs.edit.NARefAlt(
ref='', alt=self._get_altered_sequence(mapper.strand, pos_n, var_n))
edit_g = hgvs.edit.NARefAlt(ref='',
alt=self._get_altered_sequence(mapper.strand, pos_n, var_n))
pos_g.uncertain = var_c.posedit.pos.uncertain
var_g = hgvs.sequencevariant.SequenceVariant(
ac=alt_ac, type="g", posedit=hgvs.posedit.PosEdit(pos_g, edit_g))
var_g = hgvs.sequencevariant.SequenceVariant(ac=alt_ac,
type="g",
posedit=hgvs.posedit.PosEdit(pos_g, edit_g))
if self.replace_reference:
self._replace_reference(var_g)
return var_g
Expand All @@ -325,8 +340,9 @@ def c_to_n(self, var_c):
if self._validator:
self._validator.validate(var_c)
var_c.fill_ref(self.hdp)
mapper = self._fetch_AlignmentMapper(
tx_ac=var_c.ac, alt_ac=var_c.ac, alt_aln_method="transcript")
mapper = self._fetch_AlignmentMapper(tx_ac=var_c.ac,
alt_ac=var_c.ac,
alt_aln_method="transcript")
pos_n = mapper.c_to_n(var_c.posedit.pos)
if (isinstance(var_c.posedit.edit, hgvs.edit.NARefAlt)
or isinstance(var_c.posedit.edit, hgvs.edit.Dup)
Expand All @@ -335,8 +351,9 @@ def c_to_n(self, var_c):
else:
raise HGVSUnsupportedOperationError(
"Only NARefAlt/Dup/Inv types are currently implemented")
var_n = hgvs.sequencevariant.SequenceVariant(
ac=var_c.ac, type="n", posedit=hgvs.posedit.PosEdit(pos_n, edit_n))
var_n = hgvs.sequencevariant.SequenceVariant(ac=var_c.ac,
type="n",
posedit=hgvs.posedit.PosEdit(pos_n, edit_n))
if self.replace_reference:
self._replace_reference(var_n)
if self.add_gene_symbol:
Expand All @@ -359,8 +376,9 @@ def n_to_c(self, var_n):
if self._validator:
self._validator.validate(var_n)
var_n.fill_ref(self.hdp)
mapper = self._fetch_AlignmentMapper(
tx_ac=var_n.ac, alt_ac=var_n.ac, alt_aln_method="transcript")
mapper = self._fetch_AlignmentMapper(tx_ac=var_n.ac,
alt_ac=var_n.ac,
alt_aln_method="transcript")
pos_c = mapper.n_to_c(var_n.posedit.pos)
if (isinstance(var_n.posedit.edit, hgvs.edit.NARefAlt)
or isinstance(var_n.posedit.edit, hgvs.edit.Dup)
Expand All @@ -369,8 +387,9 @@ def n_to_c(self, var_n):
else:
raise HGVSUnsupportedOperationError(
"Only NARefAlt/Dup/Inv types are currently implemented")
var_c = hgvs.sequencevariant.SequenceVariant(
ac=var_n.ac, type="c", posedit=hgvs.posedit.PosEdit(pos_c, edit_c))
var_c = hgvs.sequencevariant.SequenceVariant(ac=var_n.ac,
type="c",
posedit=hgvs.posedit.PosEdit(pos_c, edit_c))
if self.replace_reference:
self._replace_reference(var_c)
if self.add_gene_symbol:
Expand Down Expand Up @@ -436,15 +455,16 @@ def _replace_reference(self, var):

# For c. variants, we need coords on underlying sequences
if var.type == "c":
mapper = self._fetch_AlignmentMapper(
tx_ac=var.ac, alt_ac=var.ac, alt_aln_method="transcript")
mapper = self._fetch_AlignmentMapper(tx_ac=var.ac,
alt_ac=var.ac,
alt_aln_method="transcript")
pos = mapper.c_to_n(var.posedit.pos)
else:
pos = var.posedit.pos

seq_start = pos.start.base - 1
seq_end = pos.end.base

# When strict_bounds is False and an error occurs, return
# variant as-is

Expand All @@ -460,8 +480,8 @@ def _replace_reference(self, var):

edit = var.posedit.edit
if edit.ref != seq:
_logger.debug("Replaced reference sequence in {var} with {seq}".format(
var=var, seq=seq))
_logger.debug("Replaced reference sequence in {var} with {seq}".format(var=var,
seq=seq))
edit.ref = seq

return var
Expand All @@ -472,8 +492,10 @@ def _fetch_AlignmentMapper(self, tx_ac, alt_ac, alt_aln_method):
Get a new AlignmentMapper for the given transcript accession (ac),
possibly caching the result.
"""
return hgvs.alignmentmapper.AlignmentMapper(
self.hdp, tx_ac=tx_ac, alt_ac=alt_ac, alt_aln_method=alt_aln_method)
return hgvs.alignmentmapper.AlignmentMapper(self.hdp,
tx_ac=tx_ac,
alt_ac=alt_ac,
alt_aln_method=alt_aln_method)

@staticmethod
def _convert_edit_check_strand(strand, edit_in):
Expand Down Expand Up @@ -551,7 +573,7 @@ def _get_altered_sequence(self, strand, interval, var):
def _update_gene_symbol(self, var, symbol):
if not symbol:
symbol = self.hdp.get_tx_identity_info(var.ac).get("hgnc", None)
var.gene = symbol
var.gene = symbol
return var


Expand Down
2 changes: 0 additions & 2 deletions tests/issues/test_606.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@


class TestIssue606(unittest.TestCase):

def test_606(self):
"""https://github.com/biocommons/hgvs/issues/606"""

from hgvs.easy import am37, parser

"""
Occasionally, an IndexError is thrown by the _get_altered_sequence method. This seems to occur
when there is either inconsistent data for a transcript in UTA or an invalid variant input.
Expand Down

0 comments on commit 5d579a8

Please sign in to comment.