Skip to content

Commit

Permalink
Add fix for excessively high-scoring symbolic INS variants.
Browse files Browse the repository at this point in the history
Update JannovarStructuralVariantAnnotator to remove INSERTION, INVERSION and STRUCTURAL_VARIANT annotations from Jannovar output.
Update VariantEvaluation.getPathogenicityScore to partially implement SvAnna scoring scheme.
  • Loading branch information
julesjacobsen committed Jun 5, 2023
1 parent 0bff4ab commit 4135cbd
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,21 @@
import de.charite.compbio.jannovar.data.JannovarData;
import de.charite.compbio.jannovar.reference.GenomeInterval;
import de.charite.compbio.jannovar.reference.SVGenomeVariant;
import de.charite.compbio.jannovar.reference.Strand;
import de.charite.compbio.jannovar.reference.TranscriptModel;
import org.monarchinitiative.exomiser.core.model.ChromosomalRegionIndex;
import org.monarchinitiative.exomiser.core.model.RegulatoryFeature;
import org.monarchinitiative.exomiser.core.model.TranscriptAnnotation;
import org.monarchinitiative.exomiser.core.model.VariantAnnotation;
import org.monarchinitiative.svart.CoordinateSystem;
import org.monarchinitiative.svart.GenomicRegion;
import org.monarchinitiative.svart.Variant;
import org.monarchinitiative.svart.VariantType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;

import static de.charite.compbio.jannovar.annotation.VariantEffect.*;
import static java.util.stream.Collectors.groupingBy;
Expand All @@ -54,6 +55,8 @@ class JannovarStructuralVariantAnnotator implements VariantAnnotator {
private static final Logger logger = LoggerFactory.getLogger(JannovarStructuralVariantAnnotator.class);
private static final VariantAnnotation EMPTY_STRUCTURAL_ANNOTATION = VariantAnnotation.of("", "", STRUCTURAL_VARIANT, List.of());

private static final VariantEffect DEFAULT_EFFECT = SEQUENCE_VARIANT;

private final GenomeAssembly genomeAssembly;
private final JannovarVariantConverter jannovarVariantConverter;
private final JannovarAnnotationService jannovarAnnotationService;
Expand Down Expand Up @@ -101,8 +104,17 @@ private List<VariantAnnotation> buildVariantAnnotations(SVAnnotations svAnnotati

private VariantAnnotation toStructuralVariantAnnotation(Variant variant, List<SVAnnotation> svAnnotations) {
svAnnotations.sort(SVAnnotation::compareTo);
SVAnnotation highestImpactAnnotation = svAnnotations.isEmpty() ? null : svAnnotations.get(0);
//Attention! highestImpactAnnotation can be null
@Nullable SVAnnotation highestImpactAnnotation = svAnnotations.isEmpty() ? null : svAnnotations.get(0);
// CAUTION! highestImpactAnnotation can be null

// Jannovar reports INSERTION & INVERSION as a HIGH impact, as the SO does not have terms to describe structural
// variants within a coding sequence, so this loses the context of where the insertion was seen.
// This is a potentially deleterious insertion:
// [INSERTION, STRUCTURAL_VARIANT, CODING_SEQUENCE_VARIANT, CODING_TRANSCRIPT_VARIANT] -> CODING_SEQUENCE_VARIANT
// This is a downstream, non-coding insertion so is probably uninteresting
// [INSERTION, DOWNSTREAM_GENE_VARIANT, STRUCTURAL_VARIANT, CODING_TRANSCRIPT_VARIANT] -> DOWNSTREAM_GENE_VARIANT
// TODO: This would be an ideal place to calculate a variantEffectScore (instead of in the VariantEffectPathogenicityScore)
// based on the SvAnna scoring system https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-022-01046-6/tables/1
VariantEffect highestImpactEffect = getHighestImpactEffect(highestImpactAnnotation);
String geneSymbol = buildGeneSymbol(highestImpactAnnotation);
String geneId = buildGeneId(highestImpactAnnotation);
Expand All @@ -114,8 +126,15 @@ private VariantAnnotation toStructuralVariantAnnotation(Variant variant, List<SV
}

private VariantEffect getHighestImpactEffect(@Nullable SVAnnotation highestImpactAnnotation) {
return (highestImpactAnnotation == null || highestImpactAnnotation.getMostPathogenicVariantEffect() == null) ? VariantEffect.STRUCTURAL_VARIANT : highestImpactAnnotation
.getMostPathogenicVariantEffect();
return (highestImpactAnnotation == null || highestImpactAnnotation.getMostPathogenicVariantEffect() == null) ? DEFAULT_EFFECT :
filterEffects(highestImpactAnnotation.getEffects());
}

private static VariantEffect filterEffects(Set<VariantEffect> variantEffects) {
return variantEffects.stream()
.filter(vaEff -> !(vaEff == INSERTION || vaEff == INVERSION || vaEff == STRUCTURAL_VARIANT))
.findFirst()
.orElse(DEFAULT_EFFECT);
}

private List<TranscriptAnnotation> buildSvTranscriptAnnotations(List<SVAnnotation> svAnnotations) {
Expand All @@ -128,7 +147,7 @@ private List<TranscriptAnnotation> buildSvTranscriptAnnotations(List<SVAnnotatio

private TranscriptAnnotation toTranscriptAnnotation(SVAnnotation svAnnotation) {
return TranscriptAnnotation.builder()
.variantEffect(getVariantEffectOrDefault(svAnnotation.getMostPathogenicVariantEffect(), VariantEffect.STRUCTURAL_VARIANT))
.variantEffect(getVariantEffectOrDefault(svAnnotation.getMostPathogenicVariantEffect(), DEFAULT_EFFECT))
.accession(TranscriptModelUtil.getTranscriptAccession(svAnnotation.getTranscript()))
.geneSymbol(buildGeneSymbol(svAnnotation))
.distanceFromNearestGene(getDistFromNearestGene(svAnnotation))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,7 @@
import org.monarchinitiative.exomiser.core.model.frequency.FrequencyData;
import org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicityData;
import org.monarchinitiative.exomiser.core.model.pathogenicity.VariantEffectPathogenicityScore;
import org.monarchinitiative.svart.Contig;
import org.monarchinitiative.svart.CoordinateSystem;
import org.monarchinitiative.svart.Position;
import org.monarchinitiative.svart.Strand;
import org.monarchinitiative.svart.*;

import java.util.*;

Expand Down Expand Up @@ -378,6 +375,31 @@ public float getPathogenicityScore() {
}
float predictedScore = pathogenicityData.getScore();
float variantEffectScore = VariantEffectPathogenicityScore.getPathogenicityScoreOf(variantEffect);
if (this.isSymbolic()) {
// SvAnna scoring https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-022-01046-6/tables/1
// | element contains v
// class | v contains t | v overlaps t | Coding or splice | UTR | Intronic | Promoter
// -----|--------------|---------------|------------------|-------|----------|---------
// DEL | 1 | 1 | {0.8, 1} | 0≤(g)≤1 | 0 | 0.4
// DUP | 1 | 0 | {0.8, 1} | 0≤(g)≤1 | 0 | 0.4
// INV | 0 | 1 | 1 | 0≤(g)≤1 | 0 | 0.4
// INS | - | - | {0.2, 0.9} | 0≤(g)≤1 | 0 | 0.4

// for UTR score = min((2* variant.length()/utr.length()), 1)
// unfortunately, we don't have the transcript data anymore by the time we get here so this needs to be
// pushed-down into the VariantAnnotator.
// CODING_SEQUENCE_VARIANT is usually only a MODIFIER type but is used by Jannovar to indicate an SV
// overlapping the CDS, so we need to up the weight here.
if (variantEffect == VariantEffect.CODING_SEQUENCE_VARIANT) {
// For INS might also be worth using the min((2* variant.length()/cds.length()), 1) score too?
variantEffectScore = this.variantType().baseType() == VariantType.INS ? 0.2f : 0.8f;
} else if (variantEffect.isSplicing()) {
variantEffectScore = this.variantType().baseType() == VariantType.INS ? 0.9f : 1.0f;
}
// do not apply any scoring to INV as Jannovar uses this as a blanket annotation for any overlap of the
// transcript, even if it only occurs in an intron or UTR, so we can't give this an outright score of 1.
// This is the reason the variantEffectScore was downgraded from 1 to 0.6.
}
if (variantEffect == VariantEffect.MISSENSE_VARIANT) {
// CAUTION! REVEL scores tend to be more nuanced and frequently lower thant either the default variant effect score
// or the other predicted path scores, yet apparently are more concordant with ClinVar. For this reason it might be
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ private Variant variant(Contig contig, int start, int end, String ref, String al
}

@Test
public void exonicInsertion() {
public void downstreamInsertion() {
Variant variant = variant(chr10, 123237843, 123237843, "T", "<INS>", 200);
List<VariantAnnotation> annotations = instance.annotate(variant);

Expand All @@ -77,7 +77,26 @@ public void exonicInsertion() {

assertThat(variantAnnotation.getGeneId(), equalTo("2263"));
assertThat(variantAnnotation.getGeneSymbol(), equalTo("FGFR2"));
assertThat(variantAnnotation.getVariantEffect(), equalTo(VariantEffect.INSERTION));
assertThat(variantAnnotation.getVariantEffect(), equalTo(VariantEffect.DOWNSTREAM_GENE_VARIANT));
}

@Test
public void exonicInsertion() {
Variant variant = variant(GenomeAssembly.HG19.getContigById(1), 145508025, 145508025, "T", "<INS>", 200);
List<VariantAnnotation> annotations = instance.annotate(variant);
assertThat(annotations.size(), equalTo(2));

for (VariantAnnotation variantAnnotation : annotations) {
assertThat(variantAnnotation.hasTranscriptAnnotations(), is(true));
// Jannovar would add an artificially HIGH impact INSERTION annotation to all non-intergenic insertions which
// leads to hugely over-inflated variant effect scores.
if (variantAnnotation.getGeneSymbol().equals("RBM8A")) {
assertThat(variantAnnotation.getVariantEffect(), equalTo(VariantEffect.CODING_SEQUENCE_VARIANT));
}
if (variantAnnotation.getGeneSymbol().equals("GNRHR2")) {
assertThat(variantAnnotation.getVariantEffect(), equalTo(VariantEffect.DOWNSTREAM_GENE_VARIANT));
}
}
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,4 +340,16 @@ public void testMassivePreciseVariantIsTreatedAsStructural() {
assertThat(variantAnnotations.size(), equalTo(1));
assertThat(variantAnnotations.get(0).getVariantEffect(), equalTo(VariantEffect.EXON_LOSS_VARIANT));
}

@Test
public void testMassivePreciseDownstreamGeneInsertionVariantIsTreatedAsStructural() {
VariantContext variantContext = TestVcfReader.forSamples("sample").readVariantContext("10 123361399 . C CCAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGGTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGAGCCAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGACAGGCGGGCTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGCTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGCTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGCTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGGTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGACAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGCCAGGCGGGGTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGCTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGCCAGGCGGGGTGAGGGTCAGAGGAAGGGCCAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGCCAGGCGGGGTGAGGGTCAGAGGAAGGGACAGGCGGGGTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCAGAGGAAGGGACAGGCGGGGTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCAGAGGAAGGGACAGGCGGGGTGAGGGTCAGAGGAAGGGGCAGGCGGGCTGAGGGTCAGAGGAAGGGCCAGGCGGGGTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCACAGGAAGGGACAGGCGGGGTGAGGGTCAGAGGAAGGGCCAGGCGGGCTGAGGGTCA . PASS . GT 1/1");
GenomeAssembly hg19 = GenomeAssembly.HG19;
VariantContextConverter variantContextConverter = VariantContextConverter.of(hg19.genomicAssembly(), VariantTrimmer.leftShiftingTrimmer(VariantTrimmer.retainingCommonBase()));
Variant variant = variantContextConverter.convertToVariant(variantContext, variantContext.getAlternateAllele(0));
List<VariantAnnotation> variantAnnotations = instance.annotate(variant);

assertThat(variantAnnotations.size(), equalTo(1));
assertThat(variantAnnotations.get(0).getVariantEffect(), equalTo(VariantEffect.UPSTREAM_GENE_VARIANT));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,28 @@ public void testGetPathogenicityScoreMissensePredictedScoreLessThanDefault() {
assertThat(instance.getPathogenicityScore(), equalTo(expected));
}

@ParameterizedTest
@CsvSource({
"<INS>, CODING_SEQUENCE_VARIANT, 0.2",
"<INS:ME>, CODING_SEQUENCE_VARIANT, 0.2",
"<INV>, CODING_SEQUENCE_VARIANT, 0.8", // should be unreachable in production code
"<DEL>, CODING_SEQUENCE_VARIANT, 0.8",
"<DEL:ME>, CODING_SEQUENCE_VARIANT, 0.8",
"<DUP>, CODING_SEQUENCE_VARIANT, 0.8",
"<INS>, SPLICE_REGION_VARIANT, 0.9",
"<INS:ME>, SPLICE_REGION_VARIANT, 0.9",
"<INV>, SPLICE_REGION_VARIANT, 1.0", // should be unreachable in production code
"<DEL>, SPLICE_REGION_VARIANT, 1.0",
"<DEL:ME>, SPLICE_REGION_VARIANT, 1.0",
"<DUP>, SPLICE_REGION_VARIANT, 1.0",
})
void testSymbolicInsertionScores(String alt, VariantEffect variantEffect, float expected) {
VariantEvaluation sv = newBuilder(2, 1, 1, "C", alt, alt.startsWith("<DEL") ? -12345 : 12345)
.variantEffect(variantEffect)
.build();
assertThat(sv.getPathogenicityScore(), equalTo(expected));
}

@Test
public void testGetFailedFilterTypes() {
Set<FilterType> expectedFilters = EnumSet.of(FAIL_FREQUENCY_RESULT.getFilterType());
Expand Down

0 comments on commit 4135cbd

Please sign in to comment.