Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HOLD: Bring in more duplication expansion calls #3668

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ private static Map<String, Object> parseComplicationsAndMakeThemAttributeMap(fin
}

if (breakpointComplications.getDupSeqStrandOnCtg() != null) {
attributeMap.put(GATKSVVCFConstants.DUP_INV_ORIENTATIONS,
attributeMap.put(GATKSVVCFConstants.DUP_ORIENTATIONS,
breakpointComplications.getDupSeqStrandOnCtg().stream().map(Strand::toString).collect(Collectors.joining()));
}
}
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@
import org.broadinstitute.hellbender.utils.SimpleInterval;
import scala.Tuple2;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.*;

import static org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.CHIMERIC_ALIGNMENTS_HIGHMQ_THRESHOLD;

Expand Down Expand Up @@ -104,9 +101,8 @@ public ChimericAlignment(final AlignmentInterval intervalWithLowerCoordOnContig,

this.strandSwitch = determineStrandSwitch(intervalWithLowerCoordOnContig, intervalWithHigherCoordOnContig);

final boolean involvesRefIntervalSwitch = involvesRefPositionSwitch(intervalWithLowerCoordOnContig, intervalWithHigherCoordOnContig);
this.isForwardStrandRepresentation = isForwardStrandRepresentation(intervalWithLowerCoordOnContig, intervalWithHigherCoordOnContig,
this.strandSwitch, involvesRefIntervalSwitch);
this.isForwardStrandRepresentation =
isForwardStrandRepresentation(intervalWithLowerCoordOnContig, intervalWithHigherCoordOnContig, strandSwitch);

this.insertionMappings = insertionMappings;
}
Expand All @@ -119,13 +115,16 @@ public ChimericAlignment(final AlignmentInterval intervalWithLowerCoordOnContig,
* 2) if the alignment region is too small, it is skipped
* If the alignment region passes the above two filters and the next alignment region could be treated as potential inserted sequence,
* note down the mapping & alignment information of that region and skip it
* @param alignedContig made of (sorted {alignmentIntervals}, sequence) of a potentially-signalling locally-assembled contig
* @param uniqueRefSpanThreshold for an alignment interval to be used to construct a ChimericAlignment,
* how long a unique--i.e. the same ref span is not covered by other alignment intervals--alignment on the reference must it have
* @param alignedContig made of (sorted {alignmentIntervals}, sequence) of a potentially-signalling locally-assembled contig
* @param uniqueRefSpanThreshold for an alignment interval to be used to construct a ChimericAlignment,
* @param filterAlignmentByMqOrLength whether to perform filtering based on MQ for first several alignments and based alignment length for all alignments
* @param filterWhollyContainedAlignments whether to perform filtering on alignments whose ref span is contained in others'
*/
@VisibleForTesting
public static List<ChimericAlignment> parseOneContig(final AlignedContig alignedContig,
final int uniqueRefSpanThreshold) {
final int uniqueRefSpanThreshold,
final boolean filterAlignmentByMqOrLength,
final boolean filterWhollyContainedAlignments) {

if (alignedContig.alignmentIntervals.size() < 2) {
return new ArrayList<>();
Expand All @@ -135,8 +134,10 @@ public static List<ChimericAlignment> parseOneContig(final AlignedContig aligned

// fast forward to the first alignment region with high MapQ
AlignmentInterval current = iterator.next();
while (mapQualTooLow(current) && iterator.hasNext()) {
current = iterator.next();
if (filterAlignmentByMqOrLength) {
while (mapQualTooLow(current, CHIMERIC_ALIGNMENTS_HIGHMQ_THRESHOLD) && iterator.hasNext()) {
current = iterator.next();
}
}

final List<ChimericAlignment> results = new ArrayList<>(alignedContig.alignmentIntervals.size() - 1);
Expand All @@ -145,9 +146,9 @@ public static List<ChimericAlignment> parseOneContig(final AlignedContig aligned
// then iterate over the AR's in pair to identify CA's.
while ( iterator.hasNext() ) {
final AlignmentInterval next = iterator.next();
if (firstAlignmentIsTooShort(current, next, uniqueRefSpanThreshold)) {
if (filterAlignmentByMqOrLength && firstAlignmentIsTooShort(current, next, uniqueRefSpanThreshold)) {
continue;
} else if (nextAlignmentMayBeNovelInsertion(current, next, uniqueRefSpanThreshold)) {
} else if (nextAlignmentMayBeInsertion(current, next, uniqueRefSpanThreshold, CHIMERIC_ALIGNMENTS_HIGHMQ_THRESHOLD, filterWhollyContainedAlignments)) {
if (iterator.hasNext()) {
insertionMappings.add(next.toPackedString());
continue;
Expand All @@ -156,10 +157,10 @@ public static List<ChimericAlignment> parseOneContig(final AlignedContig aligned
}
}

final boolean isNotSimpleTranslocation = isNotSimpleTranslocation(current, next,
determineStrandSwitch(current, next), involvesRefPositionSwitch(current, next));
if (isNotSimpleTranslocation)
results.add(new ChimericAlignment(current, next, insertionMappings, alignedContig.contigName));
final ChimericAlignment chimericAlignment = new ChimericAlignment(current, next, insertionMappings, alignedContig.contigName);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

method to be phased out, but it makes sense to let down stream users to check what type of novel adjacency this CA could point to, which requests the state checkers to be more comprehensive

if (chimericAlignment.isNotSimpleTranslocation() &&
!BreakpointComplications.isLikelyInvertedDuplication(current, next))
results.add(chimericAlignment);

current = next;
}
Expand All @@ -169,8 +170,8 @@ public static List<ChimericAlignment> parseOneContig(final AlignedContig aligned

// TODO: 11/22/16 it might also be suitable to consider the reference context this alignment region is mapped to
// and not simply apply a hard filter (need to think about how to test)
static boolean mapQualTooLow(final AlignmentInterval next) {
return next.mapQual < CHIMERIC_ALIGNMENTS_HIGHMQ_THRESHOLD;
private static boolean mapQualTooLow(final AlignmentInterval next, final int mapQThresholdInclusive) {
return next.mapQual < mapQThresholdInclusive;
}

/**
Expand All @@ -186,16 +187,18 @@ static boolean firstAlignmentIsTooShort(final AlignmentInterval first, final Ali
* To implement the idea that for two consecutive alignment regions of a contig, the one with higher reference coordinate might be a novel insertion.
*/
@VisibleForTesting
public static boolean nextAlignmentMayBeNovelInsertion(final AlignmentInterval current, final AlignmentInterval next,
final Integer minAlignLength) {
return mapQualTooLow(next) || // inserted sequence might have low mapping quality
firstAlignmentIsTooShort(next, current, minAlignLength) || // inserted sequence might be very small
current.referenceSpan.contains(next.referenceSpan) || // one might completely contain the other
next.referenceSpan.contains(current.referenceSpan);
static boolean nextAlignmentMayBeInsertion(final AlignmentInterval current, final AlignmentInterval next,
final Integer minAlignLength, final int mapQThresholdInclusive,
final boolean filterWhollyContained) {
// not unique: inserted sequence may have low mapping quality (low reference uniqueness) or may be very small (low read uniqueness)
final boolean isNotUnique = mapQualTooLow(next, mapQThresholdInclusive) || firstAlignmentIsTooShort(next, current, minAlignLength);
return isNotUnique
||
(filterWhollyContained && (current.referenceSpan.contains(next.referenceSpan) || next.referenceSpan.contains(current.referenceSpan)));
}

@VisibleForTesting
public static StrandSwitch determineStrandSwitch(final AlignmentInterval first, final AlignmentInterval second) {
static StrandSwitch determineStrandSwitch(final AlignmentInterval first, final AlignmentInterval second) {
if (first.forwardStrand == second.forwardStrand) {
return StrandSwitch.NO_SWITCH;
} else {
Expand All @@ -204,66 +207,62 @@ public static StrandSwitch determineStrandSwitch(final AlignmentInterval first,
}

/**
* Determine if the region that maps to a lower coordinate on the contig also maps to a lower coordinate on the reference.
* An SV event could be detected from a contig that seem originate from the forward or reverse strand of the reference,
* besides the annotation that the alignment flanking regions might flank either side of the two breakpoints.
*/
@VisibleForTesting
public static boolean involvesRefPositionSwitch(final AlignmentInterval regionWithLowerCoordOnContig,
final AlignmentInterval regionWithHigherCoordOnContig) {
static boolean isForwardStrandRepresentation(final AlignmentInterval regionWithLowerCoordOnContig,
final AlignmentInterval regionWithHigherCoordOnContig,
final StrandSwitch strandSwitch) {

return regionWithHigherCoordOnContig.referenceSpan.getStart() < regionWithLowerCoordOnContig.referenceSpan.getStart();
switch (strandSwitch) {
case NO_SWITCH: return regionWithLowerCoordOnContig.forwardStrand;
case FORWARD_TO_REVERSE: return regionWithLowerCoordOnContig.referenceSpan.getEnd() < regionWithHigherCoordOnContig.referenceSpan.getEnd();
case REVERSE_TO_FORWARD: return regionWithLowerCoordOnContig.referenceSpan.getStart() < regionWithHigherCoordOnContig.referenceSpan.getStart();
default: throw new IllegalArgumentException("Seeing unexpected strand switch case: " + strandSwitch.name());
}
}

// TODO: 12/15/16 simple translocations are defined here and at this time as inter-chromosomal ones and
// intra-chromosomal translocations that involve strand switch
// to get the 2nd case right, we need evidence flanking both sides of the inversion, and that could be difficult
// TODO: 12/15/16 simple translocations are defined here and at this time as
// 1) inter-chromosomal ones and
// 2) intra-chromosomal translocations that DOES NOT involve strand switch
// To get the 2nd case right, we need evidence flanking both sides of the inversion, and that could be difficult
// TODO: 1/17/17 this is used for filtering out possible translocations that we are not currently handling,
// but it overkills some insertions where the inserted sequence maps to chromosomes other than that of the flanking regions
/**
* Determine if the two regions indicate a inter-chromosomal translocation or intra-chromosomal translocation that
* DOES NOT involve a strand switch.
*/
@VisibleForTesting
public static boolean isNotSimpleTranslocation(final AlignmentInterval regionWithLowerCoordOnContig,
final AlignmentInterval regionWithHigherCoordOnContig,
final StrandSwitch strandSwitch,
final boolean involvesReferenceIntervalSwitch) {
// logic is: must be the same reference chromosome for it not to be an inter-chromosomal translocation
// and when regions are mapped to the same reference chromosome, there cannot be reference position swap
final boolean sameChromosome = regionWithLowerCoordOnContig.referenceSpan.getContig()
.equals(regionWithHigherCoordOnContig.referenceSpan.getContig());
return sameChromosome
&&
(strandSwitch!=StrandSwitch.NO_SWITCH
|| involvesReferenceIntervalSwitch == !regionWithLowerCoordOnContig.forwardStrand);
}

/**
* An SV event could be detected from a contig that seem originate from the forward or reverse strand of the reference,
* besides the annotation that the alignment flanking regions might flank either side of the two breakpoints.
*/
@VisibleForTesting
static boolean isForwardStrandRepresentation(final AlignmentInterval regionWithLowerCoordOnContig,
final AlignmentInterval regionWithHigherCoordOnContig,
final StrandSwitch strandSwitch,
final boolean involvesReferenceIntervalSwitch) {

if (strandSwitch == StrandSwitch.NO_SWITCH) {
return regionWithLowerCoordOnContig.forwardStrand && regionWithHigherCoordOnContig.forwardStrand;
static boolean isLikelySimpleTranslocation(final AlignmentInterval regionWithLowerCoordOnContig,
final AlignmentInterval regionWithHigherCoordOnContig,
final StrandSwitch strandSwitch) {

if (!regionWithLowerCoordOnContig.referenceSpan.getContig()
.equals(regionWithHigherCoordOnContig.referenceSpan.getContig()))
return true;

if (strandSwitch.equals(StrandSwitch.NO_SWITCH)) {
if (regionWithLowerCoordOnContig.forwardStrand) {
return regionWithLowerCoordOnContig.referenceSpan.getStart() > regionWithHigherCoordOnContig.referenceSpan.getStart();
} else {
return regionWithLowerCoordOnContig.referenceSpan.getEnd() < regionWithHigherCoordOnContig.referenceSpan.getEnd();
}
} else {
return !involvesReferenceIntervalSwitch;
return false;
}
}

public boolean isNotSimpleTranslocation() {
return isNotSimpleTranslocation(regionWithLowerCoordOnContig, regionWithHigherCoordOnContig, strandSwitch,
involvesRefPositionSwitch(regionWithLowerCoordOnContig, regionWithHigherCoordOnContig));
return !isLikelySimpleTranslocation(regionWithLowerCoordOnContig, regionWithHigherCoordOnContig, strandSwitch);
}

Tuple2<SimpleInterval, SimpleInterval> getCoordSortedReferenceSpans() {
if (involvesRefPositionSwitch(regionWithLowerCoordOnContig, regionWithHigherCoordOnContig))
return new Tuple2<>(regionWithHigherCoordOnContig.referenceSpan, regionWithLowerCoordOnContig.referenceSpan);
else
if (isForwardStrandRepresentation) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

used in two places: BC.iniForInsDel() and NARL.leftJustifyBreakpoints().
The first use is trivial as it is type specific, whereas for the 2nd use in NARL, we need to be careful since we are considering expanding the NARL to cover cases of ref order switch.

return new Tuple2<>(regionWithLowerCoordOnContig.referenceSpan, regionWithHigherCoordOnContig.referenceSpan);
} else {
return new Tuple2<>(regionWithHigherCoordOnContig.referenceSpan, regionWithLowerCoordOnContig.referenceSpan);
}
}


Expand Down
Loading