Skip to content

Commit

Permalink
(SV) trim down ref bases for CPX variants (#4970)
Browse files Browse the repository at this point in the history
* before all bases from affected region is extracted, leading to bloated VCF, now only the anchor base
* also fixes downstream CPX variant re-interpreter
  • Loading branch information
SHuang-Broad authored Jul 9, 2018
1 parent 111c8ef commit fa5244f
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -325,12 +325,13 @@ static VariantContext turnIntoVariantContext(final Tuple2<CpxVariantCanonicalRep
return rawVariantContextBuilder.make();
}

// TODO: 6/22/18 this leads to unnecessarily large file size, next PR trim it down to a single base like the case for DEL
private static byte[] getRefBases(final ReferenceMultiSource reference, final CpxVariantCanonicalRepresentation cpxVariantCanonicalRepresentation)
throws IOException {
final SimpleInterval affectedRefRegion = cpxVariantCanonicalRepresentation.getAffectedRefRegion();
SimpleInterval refBase = new SimpleInterval(affectedRefRegion.getContig(), affectedRefRegion.getStart(),
affectedRefRegion.getStart());
return reference
.getReferenceBases(affectedRefRegion)
.getReferenceBases(refBase)
.getBases();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryInputMetaData;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignedContig;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigWithFineTunedAlignments;
import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.read.GATKRead;
Expand Down Expand Up @@ -679,7 +680,7 @@ List<VariantContext> extract(final VariantContext complexVC, final ReferenceMult
}

// head and tail insertions only
extractFrontAndRearInsertions(complexVC, refSegments, altArrangement, result);
extractFrontAndRearInsertions(complexVC, refSegments, altArrangement, reference, result);
}

final String sourceID = complexVC.getID();
Expand Down Expand Up @@ -737,8 +738,8 @@ private static void whenAllSegmentsAppearAsIs(final VariantContext complexVC, fi
}
if ( idx + refSegments.size() - 1 < altArrangement.size() - 1 ) { // e.g. there's more after 1,2,3,4,..., there could be (that is, if long enough) front insertion
final SimpleInterval insertionPos = new SimpleInterval(complexVC.getContig(), complexVC.getEnd(), complexVC.getEnd());
final byte[] refBases = complexVC.getReference().getBases();
final Allele anchorBaseRefAlleleRear = Allele.create(refBases[refBases.length - 1], true);
final byte[] refBases = getReferenceBases(insertionPos, reference);
final Allele anchorBaseRefAlleleRear = Allele.create(refBases, true);
final VariantContextBuilder rearIns = getInsFromOneEnd(false, idx + refSegments.size() - 1, insertionPos,
anchorBaseRefAlleleRear, refSegmentLengths, altArrangement, true);
if (rearIns != null) result.add(rearIns);
Expand Down Expand Up @@ -805,9 +806,8 @@ static List<SimpleInterval> compactifyMissingSegments(final Set<SimpleInterval>
}

private void extractFrontAndRearInsertions(final VariantContext complexVC, final List<SimpleInterval> refSegmentIntervals,
final List<String> altArrangement,
final List<String> altArrangement, final ReferenceMultiSource reference,
final List<VariantContextBuilder> result) {
final byte[] refBases = complexVC.getReference().getBases();
final List<Integer> refSegmentLengths = refSegmentIntervals.stream().map(SimpleInterval::size).collect(Collectors.toList());
// index pointing to first appearance of ref segment (inverted or not) in altArrangement, from either side
int firstRefSegmentIdx = 0; // first front
Expand All @@ -819,8 +819,8 @@ private void extractFrontAndRearInsertions(final VariantContext complexVC, final
}
}
if (firstRefSegmentIdx > 0) {
final Allele anchorBaseRefAlleleFront = Allele.create(refBases[0], true);
final SimpleInterval startAndStop = makeOneBpInterval(complexVC.getContig(), complexVC.getStart());
final Allele anchorBaseRefAlleleFront = Allele.create(getReferenceBases(startAndStop, reference), true);
final VariantContextBuilder frontIns = getInsFromOneEnd(true, firstRefSegmentIdx, startAndStop, anchorBaseRefAlleleFront, refSegmentLengths, altArrangement, true);
if (frontIns != null) result.add( frontIns );
}
Expand All @@ -835,9 +835,10 @@ private void extractFrontAndRearInsertions(final VariantContext complexVC, final
}

if (firstRefSegmentIdx != altArrangement.size() - 1) {
final Allele anchorBaseRefAlleleRear = Allele.create(refBases[refBases.length - 2], true);
final SimpleInterval startAndStop = makeOneBpInterval(complexVC.getContig(), complexVC.getEnd());
final VariantContextBuilder rearIns = getInsFromOneEnd(false, firstRefSegmentIdx, startAndStop, anchorBaseRefAlleleRear, refSegmentLengths, altArrangement, true);
final int pos = complexVC.getEnd();
final SimpleInterval insertionPos = makeOneBpInterval(complexVC.getContig(), pos);
final Allele anchorBaseRefAlleleRear = Allele.create(getReferenceBases(insertionPos, reference), true);
final VariantContextBuilder rearIns = getInsFromOneEnd(false, firstRefSegmentIdx, insertionPos, anchorBaseRefAlleleRear, refSegmentLengths, altArrangement, true);
if (rearIns != null) result.add( rearIns );
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ public void testTurnIntoVariantContext() throws IOException {
final CpxVariantCanonicalRepresentation cpxVariantCanonicalRepresentation = new CpxVariantCanonicalRepresentation(tig13846_3);
final Tuple2<CpxVariantCanonicalRepresentation, Iterable<CpxVariantInducingAssemblyContig>> tuple2 =
new Tuple2<>(cpxVariantCanonicalRepresentation, Arrays.asList(tig13846_3, tig28220_5));
final byte[] refBases = TestUtilsForAssemblyBasedSVDiscovery.b38_reference_chr20_chr21.getReferenceBases(new SimpleInterval("chr20", 54849491, 54849615)).getBases();
final byte[] refBases = TestUtilsForAssemblyBasedSVDiscovery.b38_reference_chr20_chr21.getReferenceBases(new SimpleInterval("chr20", 54849491, 54849491)).getBases();

final VariantContextBuilder baseVariantContextBuilder = cpxVariantCanonicalRepresentation.toVariantContext(refBases);
baseVariantContextBuilder.attribute(GATKSVVCFConstants.TOTAL_MAPPINGS, 2);
Expand Down
Loading

0 comments on commit fa5244f

Please sign in to comment.