Skip to content

Commit

Permalink
fix bugs in ChimericAlignment
Browse files Browse the repository at this point in the history
  • Loading branch information
SHuang-Broad committed Oct 12, 2017
1 parent 64bacc4 commit 1cd756b
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@
import org.broadinstitute.hellbender.utils.SimpleInterval;
import scala.Tuple2;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.*;

import static org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.CHIMERIC_ALIGNMENTS_HIGHMQ_THRESHOLD;

Expand Down Expand Up @@ -104,8 +101,8 @@ public ChimericAlignment(final AlignmentInterval intervalWithLowerCoordOnContig,

this.strandSwitch = determineStrandSwitch(intervalWithLowerCoordOnContig, intervalWithHigherCoordOnContig);

this.isForwardStrandRepresentation = isForwardStrandRepresentation(intervalWithLowerCoordOnContig, intervalWithHigherCoordOnContig,
this.strandSwitch, involvesRefPositionSwitch(intervalWithLowerCoordOnContig, intervalWithHigherCoordOnContig));
this.isForwardStrandRepresentation =
isForwardStrandRepresentation(intervalWithLowerCoordOnContig, intervalWithHigherCoordOnContig, strandSwitch);

this.insertionMappings = insertionMappings;
}
Expand Down Expand Up @@ -210,59 +207,54 @@ static StrandSwitch determineStrandSwitch(final AlignmentInterval first, final A
}

/**
* Determine if the region that maps to a lower coordinate on the contig also maps to a lower coordinate on the reference.
* An SV event could be detected from a contig that seem originate from the forward or reverse strand of the reference,
* besides the annotation that the alignment flanking regions might flank either side of the two breakpoints.
*/
@VisibleForTesting
static boolean involvesRefPositionSwitch(final AlignmentInterval regionWithLowerCoordOnContig,
final AlignmentInterval regionWithHigherCoordOnContig) {
static boolean isForwardStrandRepresentation(final AlignmentInterval regionWithLowerCoordOnContig,
final AlignmentInterval regionWithHigherCoordOnContig,
final StrandSwitch strandSwitch) {

return regionWithHigherCoordOnContig.referenceSpan.getStart() < regionWithLowerCoordOnContig.referenceSpan.getStart();
switch (strandSwitch) {
case NO_SWITCH: return regionWithLowerCoordOnContig.forwardStrand;
case FORWARD_TO_REVERSE: return regionWithLowerCoordOnContig.referenceSpan.getEnd() < regionWithHigherCoordOnContig.referenceSpan.getEnd();
case REVERSE_TO_FORWARD: return regionWithLowerCoordOnContig.referenceSpan.getStart() < regionWithHigherCoordOnContig.referenceSpan.getStart();
default: throw new IllegalArgumentException("Seeing unexpected strand switch case: " + strandSwitch.name());
}
}

// TODO: 12/15/16 simple translocations are defined here and at this time as inter-chromosomal ones and
// intra-chromosomal translocations that involve strand switch
// to get the 2nd case right, we need evidence flanking both sides of the inversion, and that could be difficult
// TODO: 12/15/16 simple translocations are defined here and at this time as
// 1) inter-chromosomal ones and
// 2) intra-chromosomal translocations that DOES NOT involve strand switch
// To get the 2nd case right, we need evidence flanking both sides of the inversion, and that could be difficult
// TODO: 1/17/17 this is used for filtering out possible translocations that we are not currently handling,
// but it overkills some insertions where the inserted sequence maps to chromosomes other than that of the flanking regions
/**
* Determine if the two regions indicate a inter-chromosomal translocation or intra-chromosomal translocation that
* DOES NOT involve a strand switch.
*/
@VisibleForTesting
public static boolean isNotSimpleTranslocation(final AlignmentInterval regionWithLowerCoordOnContig,
final AlignmentInterval regionWithHigherCoordOnContig,
final StrandSwitch strandSwitch,
final boolean involvesReferenceIntervalSwitch) {
// logic is: must be the same reference chromosome for it not to be an inter-chromosomal translocation
// and when regions are mapped to the same reference chromosome, there cannot be reference position swap
final boolean sameChromosome = regionWithLowerCoordOnContig.referenceSpan.getContig()
.equals(regionWithHigherCoordOnContig.referenceSpan.getContig());
return sameChromosome
&&
(strandSwitch!=StrandSwitch.NO_SWITCH
|| involvesReferenceIntervalSwitch == !regionWithLowerCoordOnContig.forwardStrand);
}

/**
* An SV event could be detected from a contig that seem originate from the forward or reverse strand of the reference,
* besides the annotation that the alignment flanking regions might flank either side of the two breakpoints.
*/
@VisibleForTesting
static boolean isForwardStrandRepresentation(final AlignmentInterval regionWithLowerCoordOnContig,
final AlignmentInterval regionWithHigherCoordOnContig,
final StrandSwitch strandSwitch,
final boolean involvesReferenceIntervalSwitch) {

if (strandSwitch == StrandSwitch.NO_SWITCH) {
return regionWithLowerCoordOnContig.forwardStrand && regionWithHigherCoordOnContig.forwardStrand;
static boolean isLikelySimpleTranslocation(final AlignmentInterval regionWithLowerCoordOnContig,
final AlignmentInterval regionWithHigherCoordOnContig,
final StrandSwitch strandSwitch) {

if (!regionWithLowerCoordOnContig.referenceSpan.getContig()
.equals(regionWithHigherCoordOnContig.referenceSpan.getContig()))
return true;

if (strandSwitch.equals(StrandSwitch.NO_SWITCH)) {
if (regionWithLowerCoordOnContig.forwardStrand) {
return regionWithLowerCoordOnContig.referenceSpan.getStart() > regionWithHigherCoordOnContig.referenceSpan.getStart();
} else {
return regionWithLowerCoordOnContig.referenceSpan.getEnd() < regionWithHigherCoordOnContig.referenceSpan.getEnd();
}
} else {
return !involvesReferenceIntervalSwitch;
return false;
}
}

public boolean isNotSimpleTranslocation() {
return isNotSimpleTranslocation(regionWithLowerCoordOnContig, regionWithHigherCoordOnContig, strandSwitch,
involvesRefPositionSwitch(regionWithLowerCoordOnContig, regionWithHigherCoordOnContig));
return !isLikelySimpleTranslocation(regionWithLowerCoordOnContig, regionWithHigherCoordOnContig, strandSwitch);
}

Tuple2<SimpleInterval, SimpleInterval> getCoordSortedReferenceSpans() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,23 @@ public void testFilterByNextAlignmentMayBeNovelInsertion() {
public void testBooleanStatesAndSerialization_inversion() {

Tuple4<AlignmentInterval, AlignmentInterval, NovelAdjacencyReferenceLocations, String> testData = SVDiscoveryTestDataProvider.forSimpleInversionFromLongCtg1WithStrangeLeftBreakpoint;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.REVERSE_TO_FORWARD, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.REVERSE_TO_FORWARD, false, true);
testSerialization(testData._1(), testData._2());

testData = SVDiscoveryTestDataProvider.forSimpleInversionWithHom_leftPlus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.FORWARD_TO_REVERSE, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.FORWARD_TO_REVERSE, false, true);
testSerialization(testData._1(), testData._2());

testData = SVDiscoveryTestDataProvider.forSimpleInversionWithHom_leftMinus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.FORWARD_TO_REVERSE, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.FORWARD_TO_REVERSE, false, false);
testSerialization(testData._1(), testData._2());

testData = SVDiscoveryTestDataProvider.forSimpleInversionWithHom_rightPlus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.REVERSE_TO_FORWARD, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.REVERSE_TO_FORWARD, false, true);
testSerialization(testData._1(), testData._2());

testData = SVDiscoveryTestDataProvider.forSimpleInversionWithHom_rightMinus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.REVERSE_TO_FORWARD, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.REVERSE_TO_FORWARD, false, false);
testSerialization(testData._1(), testData._2());
}

Expand All @@ -70,34 +70,34 @@ public void testBooleanStatesAndSerialization_simpleInsertionAndDeletion() {

// simple deletion
Tuple4<AlignmentInterval, AlignmentInterval, NovelAdjacencyReferenceLocations, String> testData = SVDiscoveryTestDataProvider.forSimpleDeletion_plus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true);
testSerialization(testData._1(), testData._2());
testData = SVDiscoveryTestDataProvider.forSimpleDeletion_minus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, false);
testSerialization(testData._1(), testData._2());

// simple insertion
testData = SVDiscoveryTestDataProvider.forSimpleInsertion_plus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true);
testSerialization(testData._1(), testData._2());
testData = SVDiscoveryTestDataProvider.forSimpleInsertion_minus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, false);
testSerialization(testData._1(), testData._2());

// long range substitution
testData = SVDiscoveryTestDataProvider.forLongRangeSubstitution_plus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true);
testSerialization(testData._1(), testData._2());
testData = SVDiscoveryTestDataProvider.forLongRangeSubstitution_minus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, false);
testSerialization(testData._1(), testData._2());

// simple deletion with homology
testData = SVDiscoveryTestDataProvider.forDeletionWithHomology_plus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true);
testSerialization(testData._1(), testData._2());
testData = SVDiscoveryTestDataProvider.forDeletionWithHomology_minus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, false);
testSerialization(testData._1(), testData._2());
}

Expand All @@ -106,26 +106,26 @@ public void testBooleanStatesAndSerialization_tandemDuplication_simple() {

// tandem duplication simple contraction
Tuple4<AlignmentInterval, AlignmentInterval, NovelAdjacencyReferenceLocations, String> testData = SVDiscoveryTestDataProvider.forSimpleTanDupContraction_plus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true);
testSerialization(testData._1(), testData._2());
testData = SVDiscoveryTestDataProvider.forSimpleTanDupContraction_minus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, false);
testSerialization(testData._1(), testData._2());

// tandem duplication simple expansion
testData = SVDiscoveryTestDataProvider.forSimpleTanDupExpansion_plus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true);
testSerialization(testData._1(), testData._2());
testData = SVDiscoveryTestDataProvider.forSimpleTanDupExpansion_minus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, false);
testSerialization(testData._1(), testData._2());

// tandem duplication simple expansion with novel insertion
testData = SVDiscoveryTestDataProvider.forSimpleTanDupExpansionWithNovelIns_plus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true);
testSerialization(testData._1(), testData._2());
testData = SVDiscoveryTestDataProvider.forSimpleTanDupExpansionWithNovelIns_minus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, false);
testSerialization(testData._1(), testData._2());
}

Expand All @@ -134,47 +134,45 @@ public void testBooleanStatesAndSerialization_tandemDuplication_complex() {

// first test (the original observed event, but assigned to a different chromosome): expansion from 1 unit to 2 units with pseudo-homology
Tuple4<AlignmentInterval, AlignmentInterval, NovelAdjacencyReferenceLocations, String> testData = SVDiscoveryTestDataProvider.forComplexTanDup_1to2_pseudoHom_plus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true);
testSerialization(testData._1(), testData._2());
testData = SVDiscoveryTestDataProvider.forComplexTanDup_1to2_pseudoHom_minus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, false);
testSerialization(testData._1(), testData._2());

// second test: contraction from 2 units to 1 unit with pseudo-homology
testData = SVDiscoveryTestDataProvider.forComplexTanDup_2to1_pseudoHom_plus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true);
testSerialization(testData._1(), testData._2());
testData = SVDiscoveryTestDataProvider.forComplexTanDup_2to1_pseudoHom_minus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, false);
testSerialization(testData._1(), testData._2());

// third test: contraction from 3 units to 2 units without pseudo-homology
testData = SVDiscoveryTestDataProvider.forComplexTanDup_3to2_noPseudoHom_plus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true);
testSerialization(testData._1(), testData._2());
testData = SVDiscoveryTestDataProvider.forComplexTanDup_3to2_noPseudoHom_minus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, false);
testSerialization(testData._1(), testData._2());

// fourth test: expansion from 2 units to 3 units without pseudo-homology
testData = SVDiscoveryTestDataProvider.forComplexTanDup_2to3_noPseudoHom_plus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true, true);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, true);
testSerialization(testData._1(), testData._2());
testData = SVDiscoveryTestDataProvider.forComplexTanDup_2to3_noPseudoHom_minus;
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, true, true, false);
testBooleanSeries(testData._1(), testData._2(), StrandSwitch.NO_SWITCH, false, false);
testSerialization(testData._1(), testData._2());
}

private static void testBooleanSeries(final AlignmentInterval region1, final AlignmentInterval region2,
final StrandSwitch expectedStrandSwitch,
final boolean expectedRefPositionSwitch,
final boolean expectedIsNotSimpleTranslocation,
final boolean expectedIsLikelySimpleTranslocation,
final boolean expectedIsForwardStrandRepresentation) {

Assert.assertEquals(ChimericAlignment.determineStrandSwitch(region1, region2), expectedStrandSwitch);
Assert.assertEquals(ChimericAlignment.involvesRefPositionSwitch(region1, region2), expectedRefPositionSwitch);
Assert.assertEquals(ChimericAlignment.isForwardStrandRepresentation(region1, region2, expectedStrandSwitch, expectedRefPositionSwitch), expectedIsForwardStrandRepresentation);
Assert.assertEquals(ChimericAlignment.isNotSimpleTranslocation(region1, region2, expectedStrandSwitch, expectedRefPositionSwitch), expectedIsNotSimpleTranslocation);
Assert.assertEquals(ChimericAlignment.isForwardStrandRepresentation(region1, region2, expectedStrandSwitch), expectedIsForwardStrandRepresentation);
Assert.assertEquals(ChimericAlignment.isLikelySimpleTranslocation(region1, region2, expectedStrandSwitch), expectedIsLikelySimpleTranslocation);
}

private static void testSerialization(final AlignmentInterval region1, final AlignmentInterval region2) {
Expand Down

0 comments on commit 1cd756b

Please sign in to comment.