Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for right-shifting deletions on AA sequences. #512

Merged
merged 1 commit into from
Jun 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
* Switching to Github Workflows for continuous integration.
* Bumping a couple of dependencies.

### jannovar-core

* Apply fix for (#498, PR #499 by @roland-ewald of @limbus-medtec).
This fixes a problem with right-shifting deletions on amino acid sequences.
See the tickes and merge request for details.

## v0.34

### jannovar-cli
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,9 @@ public CDSExonicAnnotationBuilder() {
wtAASeq.substring(refChangeBeginPos.getPos() / 3, Math.min((refChangeLastPos.getPos() + 1 + 2) / 3, wtAASeq.length())),
varAASeq.substring(varChangeBeginPos.getPos() / 3, Math.min((varChangeLastPos.getPos() + 1 + 2) / 3, varAASeq.length())));

// Shift change in case it is by chance synonymous
this.aaChange = AminoAcidChangeNormalizer.shiftSynonymousChange(this.aaChange, wtAASeq, varAASeq);

// Look for stop codon, starting at change position.
this.varAAStopPos = varAASeq.indexOf('*', refChangeBeginPos.getPos() / 3);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,7 @@ public CDSExonicAnnotationBuilder() {
final int varAAEndPos = Math.min(changeBeginPos.getPos() / 3 + delta, varAASeq.length());
final String insAA = varAASeq.substring(changeBeginPos.getPos() / 3, varAAEndPos);
this.aaChange = new AminoAcidChange(changeBeginPos.getPos() / 3, delAA, insAA);
this.aaChange = AminoAcidChangeNormalizer.truncateBothSides(this.aaChange);
this.aaChange = AminoAcidChangeNormalizer.normalizeDeletion(wtAASeq, this.aaChange);
this.aaChange = AminoAcidChangeNormalizer.shiftSynonymousChange(this.aaChange, wtAASeq, varAASeq);
}

public Annotation build() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,7 @@ public CDSExonicAnnotationBuilder() {
final String insertAA = varAASeq.substring(insertAAPos, insertAAPos + insertAALength);
this.aaChange = new AminoAcidChange(insertAAPos, delAA, insertAA);
this.aaChange = AminoAcidChangeNormalizer.truncateAltAfterStopCodon(aaChange);
this.aaChange = AminoAcidChangeNormalizer.truncateBothSides(aaChange);
this.aaChange = AminoAcidChangeNormalizer.shiftInsertion(aaChange, wtAASeq);
this.aaChange = AminoAcidChangeNormalizer.shiftSynonymousChange(aaChange, wtAASeq, varAASeq);
// Obtain amino acid insertion position.
this.varAAInsertPos = this.aaChange.getPos();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
*/
public final class AminoAcidChangeNormalizer {

/** Utility class, should not be instantiated.*/
private AminoAcidChangeNormalizer() {}

/**
* Search for stop codon in <code>change.alt</code> and truncate afterwards.
*
Expand All @@ -21,44 +24,44 @@ public static AminoAcidChange truncateAltAfterStopCodon(AminoAcidChange change)
}

/**
* Normalize deletion {@link AminoAcidChange} for amino acid string
* <p>
* Return <code>change</code> if it is not a clean deletion.
* This shifts the amino acid and its position to be reported in the <code>proteinChange</code> HGVS annotation to the
* first position where the amino acids actually differ. This is necessary because
* {@link AminoAcidChangeNormalizer#truncateBothSides(AminoAcidChange)} does not suffice in all situations.
*
* @param ref reference amino acid string to change
* @param change the {@link AminoAcidChange} to normalize
* @return normalized AminoAcidChange
* @param change the original amino acid change to be shifted
* @param wtAASeq the wildtype amino acid sequence (i.e. the translated reference sequence)
* @param varAASeq the predicted amino acid sequence induced by the variant
* @return the amino acid change shifted to the first difference in the sequence, or the end of the ref/alt CDS
*/
public static AminoAcidChange normalizeDeletion(String ref, AminoAcidChange change) {
if (change.getRef().length() == 0 || change.getAlt().length() != 0)
return change;

// Compute shift of deletion.
int shift = 0;
final int LEN = change.getRef().length();
while (change.getPos() + LEN + shift < ref.length()
&& ref.charAt(change.getPos()) == ref.charAt(change.getPos() + LEN + shift))
shift += 1;
if (shift == 0)
return change;

// Build new AminoAcidChange.
StringBuilder changeRefBuilder = new StringBuilder();
changeRefBuilder.append(ref.substring(change.getPos() + shift, change.getPos() + shift + change.getRef().length()));
return new AminoAcidChange(change.getPos() + shift, changeRefBuilder.toString(), "");
public static AminoAcidChange shiftSynonymousChange(AminoAcidChange change, String wtAASeq, String varAASeq) {
AminoAcidChange aminoAcidChange = truncateBothSides(change);
int position = aminoAcidChange.getPos();
int originalPosition = position;
int maxPosition = Math.min(
wtAASeq.length() - aminoAcidChange.getRef().length(),
varAASeq.length() - aminoAcidChange.getAlt().length());
while (position < maxPosition && wtAASeq.charAt(position) != '*'
&& wtAASeq.charAt(position) == varAASeq.charAt(position)) {
position++;
}
if (position == originalPosition) {
return aminoAcidChange;
}
return new AminoAcidChange(position,
wtAASeq.substring(position, position + aminoAcidChange.getRef().length()),
varAASeq.substring(position, position + aminoAcidChange.getAlt().length()));
}

/**
* Truncate {@link AminoAcidChange} from both sides for matching ref/alt prefixes/suffixes.
* <p>
* Truncating of the prefixes is given higher priority to conform with the HGVS notation (you have to call
* {@link #shiftInsertion}) afterwards.
* {@link #shiftSynonymousChange}) afterwards.
*
* @param aaChange {@link AminoAcidChange} to truncate
* @return updated {@link AminoAcidChange}
*/
public static AminoAcidChange truncateBothSides(AminoAcidChange aaChange) {
// TODO(holtgrem): Test me!
private static AminoAcidChange truncateBothSides(AminoAcidChange aaChange) {

// Truncate suffixes / from the right.
final int REFLEN = aaChange.getRef().length() - 1;
Expand All @@ -84,38 +87,4 @@ public static AminoAcidChange truncateBothSides(AminoAcidChange aaChange) {
return aaChange;
}

/**
* Shift insertion {@link AminoAcidChange} to the right in WT AA sequence.
* <p>
* Returns <code>aaChange</code> if <code>aaChange.ref</code> is not the empty string.
*
* @param aaChange {@link AminoAcidChange} to normalize
* @param wtAASeq WT AA sequence to use for shifting
* @return updated {@link AminoAcidChange}
*/
public static AminoAcidChange shiftInsertion(AminoAcidChange aaChange, String wtAASeq) {
// TODO(holtgrem): Test me!
if (aaChange.getRef().length() != 0)
return aaChange;

// Insert the alternative bases at the position indicated by txPos.
StringBuilder builder = new StringBuilder(wtAASeq);
builder.insert(aaChange.getPos(), aaChange.getAlt());

// Execute algorithm and compute the shift.
int pos = aaChange.getPos();
int shift = 0;
final int LEN = aaChange.getAlt().length();
final String seq = builder.toString();
while ((pos + LEN < seq.length()) && (seq.charAt(pos) == seq.charAt(pos + LEN))) {
++shift;
++pos;
}

if (shift == 0) // only rebuild if shift > 0
return aaChange;
else
return new AminoAcidChange(pos, "", seq.substring(pos, pos + LEN));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,20 @@ public void testForwardFrameShiftDeletion() throws InvalidGenomeVariant {
Assert.assertEquals(ImmutableSortedSet.of(VariantEffect.FRAMESHIFT_TRUNCATION), annotation1.getEffects());
}

@Test
public void testForwardInternalFrameShiftNormalization() throws InvalidGenomeVariant {
// The following starts with a codon but still causes a shift in the nucleotide sequence.
GenomeVariant change1 = new GenomeVariant(new GenomePosition(refDict, Strand.FWD, 1, 6645991,
PositionType.ZERO_BASED), "GAGAAACCCT", "");
Annotation annotation1 = new DeletionAnnotationBuilder(infoForward, change1, new AnnotationBuilderOptions())
.build();
Assert.assertEquals(infoForward.getAccession(), annotation1.getTranscript().getAccession());
Assert.assertEquals(3, annotation1.getAnnoLoc().getRank());
Assert.assertEquals("946_955del", annotation1.getCDSNTChange().toHGVSString());
Assert.assertEquals("(Glu316Leufs*25)", annotation1.getProteinChange().toHGVSString(AminoAcidCode.THREE_LETTER));
Assert.assertEquals(ImmutableSortedSet.of(VariantEffect.FRAMESHIFT_TRUNCATION), annotation1.getEffects());
}

@Test
public void testForwardNonFrameShiftDeletion() throws InvalidGenomeVariant {
// clean (FS of begin position is 0) deletion of one codon, starting in intron (thus no "exon3" annotation is
Expand Down Expand Up @@ -1207,7 +1221,7 @@ public void testRealWorldCase_uc011mcs_() throws InvalidGenomeVariant {
Assert.assertEquals(infoForward.getAccession(), annotation1.getTranscript().getAccession());
Assert.assertEquals(12, annotation1.getAnnoLoc().getRank());
Assert.assertEquals("1068_1071del", annotation1.getCDSNTChange().toHGVSString());
Assert.assertEquals("(Glu358del)", annotation1.getProteinChange().toHGVSString(AminoAcidCode.THREE_LETTER));
Assert.assertEquals("(Glu357Lysfs*?)", annotation1.getProteinChange().toHGVSString(AminoAcidCode.THREE_LETTER));
Assert.assertEquals(ImmutableSortedSet.of(VariantEffect.FRAMESHIFT_TRUNCATION), annotation1.getEffects());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,42 +13,84 @@ public class AminoAcidChangeTest {

@Before
public void setUp() throws Exception {
this.ref = "AAACCCAAACCC";
this.ref2 = "CATCATCTTCA";
this.ref = "LLLCCCLLLCCC";
this.ref2 = "CLTCLTCTTCL";
}

@Test
public void testShiftNoRefChange() {
AminoAcidChange origChange = new AminoAcidChange(3, "CC", "");
AminoAcidChange modChange = AminoAcidChangeNormalizer.normalizeDeletion(ref, origChange);
AminoAcidChange modChange = AminoAcidChangeNormalizer.shiftSynonymousChange(origChange, ref, "LLLCLLLCCC");
Assert.assertEquals(new AminoAcidChange(4, "CC", ""), modChange);
}

@Test
public void testShiftNoRefChangeEnd() {
AminoAcidChange origChange = new AminoAcidChange(9, "CC", "");
AminoAcidChange modChange = AminoAcidChangeNormalizer.normalizeDeletion(ref, origChange);
AminoAcidChange modChange = AminoAcidChangeNormalizer.shiftSynonymousChange(origChange, ref, "LLLCCCLLLC");
Assert.assertEquals(new AminoAcidChange(10, "CC", ""), modChange);
}

@Test
public void testShiftRefChange() {
AminoAcidChange origChange = new AminoAcidChange(3, "CAT", "");
AminoAcidChange modChange = AminoAcidChangeNormalizer.normalizeDeletion(ref2, origChange);
Assert.assertEquals(new AminoAcidChange(4, "ATC", ""), modChange);
AminoAcidChange origChange = new AminoAcidChange(3, "CLT", "");
AminoAcidChange modChange = AminoAcidChangeNormalizer.shiftSynonymousChange(origChange, ref2, "CLTCTTCL");
Assert.assertEquals(new AminoAcidChange(4, "LTC", ""), modChange);
}

@Test
public void testShiftDeletionFrameshiftNoInsAA() {
AminoAcidChange origChange = new AminoAcidChange(3, "CQY", "");
AminoAcidChange modChange = AminoAcidChangeNormalizer.shiftSynonymousChange(origChange, "LLLCQYCLLL", "LLLCQYVA");
Assert.assertEquals(new AminoAcidChange(6, "CLL", ""), modChange);
}

@Test
public void testShiftDeletionFrameshiftInsAA() {
AminoAcidChange origChange = new AminoAcidChange(3, "CQY", "C");
AminoAcidChange modChange = AminoAcidChangeNormalizer.shiftSynonymousChange(origChange, "LLLCQYCLLL", "LLLCQYVA");
Assert.assertEquals(new AminoAcidChange(6, "CL", ""), modChange);
}

@Test
public void testShiftInsertionNoFrameshift() {
AminoAcidChange origChange = new AminoAcidChange(3, "C", "CQY");
AminoAcidChange modChange = AminoAcidChangeNormalizer.shiftSynonymousChange(origChange, "LLLCQYCLLL", "LLLCQYQYCLLL");
Assert.assertEquals(new AminoAcidChange(6, "", "QY"), modChange);
}

@Test
public void testShiftInsertionFrameshift() {
AminoAcidChange origChange = new AminoAcidChange(3, "C", "CQY");
AminoAcidChange modChange = AminoAcidChangeNormalizer.shiftSynonymousChange(origChange, "LLLCQYCLLL", "LLLCQYQYVAAA");
Assert.assertEquals(new AminoAcidChange(6, "", "QY"), modChange);
}

@Test
public void testShiftSubstitutionNoFrameshift() {
AminoAcidChange origChange = new AminoAcidChange(3, "CQYC", "CQLC");
AminoAcidChange modChange = AminoAcidChangeNormalizer.shiftSynonymousChange(origChange, "LLLCQYCLLL", "LLLCQLCLLL");
Assert.assertEquals(new AminoAcidChange(5, "Y", "L"), modChange);
}

@Test
public void testShiftSubstitutionFrameshift() {
AminoAcidChange origChange = new AminoAcidChange(3, "CQYC", "CQYQ");
AminoAcidChange modChange = AminoAcidChangeNormalizer.shiftSynonymousChange(origChange, "LLLCQYCLLL", "LLLCQYQYVAAA");
Assert.assertEquals(new AminoAcidChange(6, "C", "Q"), modChange);
}

@Test
public void testNoShift() {
AminoAcidChange origChange = new AminoAcidChange(3, "CCC", "");
AminoAcidChange modChange = AminoAcidChangeNormalizer.normalizeDeletion(ref, origChange);
AminoAcidChange modChange = AminoAcidChangeNormalizer.shiftSynonymousChange(origChange, ref, "LLLLLLCCC");
Assert.assertEquals(new AminoAcidChange(3, "CCC", ""), modChange);
}

@Test
public void testNoShiftEnd() {
AminoAcidChange origChange = new AminoAcidChange(9, "CCC", "");
AminoAcidChange modChange = AminoAcidChangeNormalizer.normalizeDeletion(ref, origChange);
AminoAcidChange modChange = AminoAcidChangeNormalizer.shiftSynonymousChange(origChange, ref, "LLLCCCLLL");
Assert.assertEquals(new AminoAcidChange(9, "CCC", ""), modChange);
}

Expand Down