From 5b8e571ecc90c8e8bc604f9dccdb0985bca2c39f Mon Sep 17 00:00:00 2001 From: Steve Huang Date: Tue, 17 Apr 2018 02:41:22 -0400 Subject: [PATCH] (SV) Representation change commit 3: change how DUP variants with short duplicated ref region are represented in NEW CODE PATH ONLY --- .../NovelAdjacencyAndAltHaplotype.java | 24 +- .../AnnotatedVariantProducerUnitTest.java | 8 +- ...sFromContigAlignmentsSAMSparkUnitTest.java | 4 +- .../SimpleSVDiscoveryTestDataProvider.java | 256 +++++++++++++----- .../sv/discovery/SimpleSVTypeUnitTest.java | 33 ++- .../inference/ChimericAlignmentUnitTest.java | 8 +- ...NovelAdjacencyAndAltHaplotypeUnitTest.java | 23 +- 7 files changed, 263 insertions(+), 93 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotype.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotype.java index 3560727f7ec..06019f42ead 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotype.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotype.java @@ -232,10 +232,16 @@ public List toSimpleOrBNDTypes(final ReferenceMultiSource reference, fin final int svLength = this.getComplication().getInsertedSequenceForwardStrandRep().length(); return Collections.singletonList( new SimpleSVType.Insertion(this, svLength) ); } - case SMALL_DUP_EXPANSION: // TODO: 4/16/18 make adjustments based on duplicated size + case SMALL_DUP_EXPANSION: { final int svLength = getLengthForDupTandem(this); - return Collections.singletonList( new SimpleSVType.DuplicationTandem(this, svLength) ); + final BreakpointComplications.SmallDuplicationWithPreciseDupRangeBreakpointComplications duplicationComplication = + (BreakpointComplications.SmallDuplicationWithPreciseDupRangeBreakpointComplications) this.getComplication(); + if (duplicationComplication.getDupSeqRepeatUnitRefSpan().size() < 50) { + return Collections.singletonList( new SimpleSVType.Insertion(this, svLength)); + } else { + return Collections.singletonList( new SimpleSVType.DuplicationTandem(this, svLength) ); + } } case DEL_DUP_CONTRACTION: { @@ -244,13 +250,19 @@ public List toSimpleOrBNDTypes(final ReferenceMultiSource reference, fin } case SMALL_DUP_CPX: { - if ( ((BreakpointComplications.SmallDuplicationWithImpreciseDupRangeBreakpointComplications) - this.getComplication()).isDupContraction() ) { + final BreakpointComplications.SmallDuplicationWithImpreciseDupRangeBreakpointComplications duplicationComplication = + (BreakpointComplications.SmallDuplicationWithImpreciseDupRangeBreakpointComplications) + this.getComplication(); + if ( duplicationComplication.isDupContraction() ) { final int svLength = leftJustifiedLeftRefLoc.getEnd() - leftJustifiedRightRefLoc.getStart(); return Collections.singletonList( new SimpleSVType.Deletion(this, svLength) ); - } else { // TODO: 4/16/18 make adjustments based on duplicated size + } else { final int svLength = getLengthForDupTandem(this); - return Collections.singletonList( new SimpleSVType.DuplicationTandem(this, svLength) ); + if (duplicationComplication.getDupSeqRepeatUnitRefSpan().size() < 50) { + return Collections.singletonList( new SimpleSVType.Insertion(this, svLength)); + } else { + return Collections.singletonList( new SimpleSVType.DuplicationTandem(this, svLength) ); + } } } default: diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducerUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducerUnitTest.java index 6256a4f89ac..4c9f31f3ce1 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducerUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/AnnotatedVariantProducerUnitTest.java @@ -109,10 +109,10 @@ private Object[][] dataForEvidenceAnnotation() { data.add(new Object[]{forSimpleTanDupContraction_minus, new String[]{"60"}, new String[]{String.valueOf(40)}}); // simple tandem dup expansion from 1 unit to 2 units - data.add(new Object[]{forSimpleTanDupExpansion_plus, new String[]{"60"}, new String[]{String.valueOf(50)}}); + data.add(new Object[]{forSimpleTanDupExpansion_ins_plus, new String[]{"60"}, new String[]{String.valueOf(50)}}); // simple tandem dup expansion from 1 unit to 2 units and novel insertion - data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_minus, new String[]{"60"}, new String[]{String.valueOf(137)}}); + data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_dup_minus, new String[]{"60"}, new String[]{String.valueOf(137)}}); // tandem dup expansion from 1 unit to 2 units with pseudo-homology data.add(new Object[]{forComplexTanDup_1to2_pseudoHom_plus, new String[]{"60"}, new String[]{String.valueOf(127)}}); @@ -206,12 +206,12 @@ private Object[][] dataForIntegrativeTest() { broadcastCNVCalls, referenceBroadcast, refSeqDictBroadcast}); // simple tandem dup expansion from 1 unit to 2 units - data.add(new Object[]{forSimpleTanDupExpansion_plus, + data.add(new Object[]{forSimpleTanDupExpansion_ins_plus, Stream.concat( commonAttributes.stream(), Sets.newHashSet(DUP_TAN_EXPANSION_STRING, DUP_REPEAT_UNIT_REF_SPAN, DUP_SEQ_CIGARS, DUPLICATION_NUMBERS, DUP_ORIENTATIONS).stream()).sorted().collect(Collectors.toList()), broadcastCNVCalls, referenceBroadcast, refSeqDictBroadcast}); // simple tandem dup expansion from 1 unit to 2 units and novel insertion - data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_minus, + data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_dup_minus, Stream.concat( commonAttributes.stream(), Sets.newHashSet(DUP_TAN_EXPANSION_STRING, DUP_REPEAT_UNIT_REF_SPAN, DUP_SEQ_CIGARS, DUPLICATION_NUMBERS, DUP_ORIENTATIONS, INSERTED_SEQUENCE, INSERTED_SEQUENCE_LENGTH).stream()).sorted().collect(Collectors.toList()), broadcastCNVCalls, referenceBroadcast, refSeqDictBroadcast}); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSparkUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSparkUnitTest.java index 6d33a4dfcd3..21c67c6411c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSparkUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/DiscoverVariantsFromContigAlignmentsSAMSparkUnitTest.java @@ -99,10 +99,10 @@ private Object[][] forTypeInference() { data.add(new Object[]{forSimpleTanDupContraction_plus.biPathBubble, DEL.name(), ImmutableSet.of(DUP_TAN_CONTRACTION_STRING)}); // simple tandem dup expansion from 1 unit to 2 units - data.add(new Object[]{forSimpleTanDupExpansion_minus.biPathBubble, DUP.name(), ImmutableSet.of(DUP_TAN_EXPANSION_STRING)}); + data.add(new Object[]{forSimpleTanDupExpansion_ins_minus.biPathBubble, DUP.name(), ImmutableSet.of(DUP_TAN_EXPANSION_STRING)}); // simple tandem dup expansion from 1 unit to 2 units and novel insertion - data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_plus.biPathBubble, DUP.name(), ImmutableSet.of(DUP_TAN_EXPANSION_STRING)}); + data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_dup_plus.biPathBubble, DUP.name(), ImmutableSet.of(DUP_TAN_EXPANSION_STRING)}); // tandem dup expansion from 1 unit to 2 units with pseudo-homology data.add(new Object[]{forComplexTanDup_1to2_pseudoHom_minus.biPathBubble, DUP.name(), ImmutableSet.of(DUP_TAN_EXPANSION_STRING)}); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVDiscoveryTestDataProvider.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVDiscoveryTestDataProvider.java index fb74ebaecc5..33b7222023c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVDiscoveryTestDataProvider.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVDiscoveryTestDataProvider.java @@ -120,10 +120,14 @@ public static final class TestDataForSimpleSVs { public static final TestDataForSimpleSVs forDeletionWithHomology_minus; public static final TestDataForSimpleSVs forSimpleTanDupContraction_plus; public static final TestDataForSimpleSVs forSimpleTanDupContraction_minus; - public static final TestDataForSimpleSVs forSimpleTanDupExpansion_plus; - public static final TestDataForSimpleSVs forSimpleTanDupExpansion_minus; - public static final TestDataForSimpleSVs forSimpleTanDupExpansionWithNovelIns_plus; - public static final TestDataForSimpleSVs forSimpleTanDupExpansionWithNovelIns_minus; + public static final TestDataForSimpleSVs forSimpleTanDupExpansion_ins_plus; + public static final TestDataForSimpleSVs forSimpleTanDupExpansion_ins_minus; + public static final TestDataForSimpleSVs forSimpleTanDupExpansion_dup_plus; + public static final TestDataForSimpleSVs forSimpleTanDupExpansion_dup_minus; + public static final TestDataForSimpleSVs forSimpleTanDupExpansionWithNovelIns_ins_plus; + public static final TestDataForSimpleSVs forSimpleTanDupExpansionWithNovelIns_ins_minus; + public static final TestDataForSimpleSVs forSimpleTanDupExpansionWithNovelIns_dup_plus; + public static final TestDataForSimpleSVs forSimpleTanDupExpansionWithNovelIns_dup_minus; public static final TestDataForSimpleSVs forComplexTanDup_1to2_pseudoHom_plus; public static final TestDataForSimpleSVs forComplexTanDup_1to2_pseudoHom_minus; public static final TestDataForSimpleSVs forComplexTanDup_2to1_pseudoHom_plus; @@ -132,6 +136,10 @@ public static final class TestDataForSimpleSVs { public static final TestDataForSimpleSVs forComplexTanDup_3to2_noPseudoHom_minus; public static final TestDataForSimpleSVs forComplexTanDup_2to3_noPseudoHom_plus; public static final TestDataForSimpleSVs forComplexTanDup_2to3_noPseudoHom_minus; + public static final TestDataForSimpleSVs forComplexTanDup_1to2_short_pseudoHom_plus; + public static final TestDataForSimpleSVs forComplexTanDup_1to2_short_pseudoHom_minus; + public static final TestDataForSimpleSVs forComplexTanDup_2to3_short_noPseudoHom_plus; + public static final TestDataForSimpleSVs forComplexTanDup_2to3_short_noPseudoHom_minus; static { try ( final ByteArrayOutputStream outputStream = new ByteArrayOutputStream() ){ @@ -169,12 +177,16 @@ public static final class TestDataForSimpleSVs { forSimpleTanDupContraction_minus = simpleTandemDuplicationContraction.get(1); final List simpleTandemDuplicationExpansion = forSimpleTandemDuplicationExpansion(outputStream); - forSimpleTanDupExpansion_plus = simpleTandemDuplicationExpansion.get(0); - forSimpleTanDupExpansion_minus = simpleTandemDuplicationExpansion.get(1); + forSimpleTanDupExpansion_ins_plus = simpleTandemDuplicationExpansion.get(0); + forSimpleTanDupExpansion_ins_minus = simpleTandemDuplicationExpansion.get(1); + forSimpleTanDupExpansion_dup_plus = simpleTandemDuplicationExpansion.get(2); + forSimpleTanDupExpansion_dup_minus = simpleTandemDuplicationExpansion.get(3); final List simpleTandemDuplicationExpansionWithNovelInsertion = forSimpleTandemDuplicationExpansionWithNovelInsertion(outputStream); - forSimpleTanDupExpansionWithNovelIns_plus = simpleTandemDuplicationExpansionWithNovelInsertion.get(0); - forSimpleTanDupExpansionWithNovelIns_minus = simpleTandemDuplicationExpansionWithNovelInsertion.get(1); + forSimpleTanDupExpansionWithNovelIns_ins_plus = simpleTandemDuplicationExpansionWithNovelInsertion.get(0); + forSimpleTanDupExpansionWithNovelIns_ins_minus = simpleTandemDuplicationExpansionWithNovelInsertion.get(1); + forSimpleTanDupExpansionWithNovelIns_dup_plus = simpleTandemDuplicationExpansionWithNovelInsertion.get(2); + forSimpleTanDupExpansionWithNovelIns_dup_minus = simpleTandemDuplicationExpansionWithNovelInsertion.get(3); final List complexTandemDuplication = forComplexTandemDuplication(); forComplexTanDup_1to2_pseudoHom_plus = complexTandemDuplication.get(0); @@ -186,6 +198,12 @@ public static final class TestDataForSimpleSVs { forComplexTanDup_2to3_noPseudoHom_plus = complexTandemDuplication.get(6); forComplexTanDup_2to3_noPseudoHom_minus = complexTandemDuplication.get(7); + final List shortComplexTandemDuplication = forComplexTandemDuplicationIns(); + forComplexTanDup_1to2_short_pseudoHom_plus = shortComplexTandemDuplication.get(0); + forComplexTanDup_1to2_short_pseudoHom_minus = shortComplexTandemDuplication.get(1); + forComplexTanDup_2to3_short_noPseudoHom_plus = shortComplexTandemDuplication.get(2); + forComplexTanDup_2to3_short_noPseudoHom_minus = shortComplexTandemDuplication.get(3); + testDataInitialized = true; } catch (final Exception ioex) { throw new GATKException("Failed to create test data ", ioex); @@ -209,10 +227,10 @@ public static List getAllTestData() { forDeletionWithHomology_minus, forSimpleTanDupContraction_plus, forSimpleTanDupContraction_minus, - forSimpleTanDupExpansion_plus, - forSimpleTanDupExpansion_minus, - forSimpleTanDupExpansionWithNovelIns_plus, - forSimpleTanDupExpansionWithNovelIns_minus, + forSimpleTanDupExpansion_ins_plus, + forSimpleTanDupExpansion_ins_minus, + forSimpleTanDupExpansionWithNovelIns_dup_plus, + forSimpleTanDupExpansionWithNovelIns_dup_minus, forComplexTanDup_1to2_pseudoHom_plus, forComplexTanDup_1to2_pseudoHom_minus, forComplexTanDup_2to1_pseudoHom_plus, @@ -235,8 +253,8 @@ public static List> getAllTes new Tuple2<>(forLongRangeSubstitution_fudgedDel_plus, forLongRangeSubstitution_fudgedDel_minus), new Tuple2<>(forDeletionWithHomology_plus, forDeletionWithHomology_minus), new Tuple2<>(forSimpleTanDupContraction_plus, forSimpleTanDupContraction_minus), - new Tuple2<>(forSimpleTanDupExpansion_plus, forSimpleTanDupExpansion_minus), - new Tuple2<>(forSimpleTanDupExpansionWithNovelIns_plus, forSimpleTanDupExpansionWithNovelIns_minus), + new Tuple2<>(forSimpleTanDupExpansion_ins_plus, forSimpleTanDupExpansion_ins_minus), + new Tuple2<>(forSimpleTanDupExpansionWithNovelIns_dup_plus, forSimpleTanDupExpansionWithNovelIns_dup_minus), new Tuple2<>(forComplexTanDup_1to2_pseudoHom_plus, forComplexTanDup_1to2_pseudoHom_minus), new Tuple2<>(forComplexTanDup_2to1_pseudoHom_plus, forComplexTanDup_2to1_pseudoHom_minus), new Tuple2<>(forComplexTanDup_3to2_noPseudoHom_plus, forComplexTanDup_3to2_noPseudoHom_minus), @@ -583,44 +601,81 @@ public static List> getAllTes } /** + * case that will be called as insertion * 40-'A' + 10-'C' + 40-'G' is expanded to 40-'A' + 20-'C' + 40-'G' (forward strand representation) - * Return a list of two entries for positive and reverse strand representations. + * + * case that will be called as duplication + * 40-'A' + 55-'C' + 40-'G' is expanded to 40-'A' + 110-'C' + 40-'G' (forward strand representation) */ private static List forSimpleTandemDuplicationExpansion(final ByteArrayOutputStream outputStream) throws IOException { final List result = new ArrayList<>(); - // simple tandem duplication expansion '+' strand representation - final byte[] leftRefFlank = SVTestUtils.makeDummySequence(40, (byte)'A'); - final byte[] rightRefFlank = SVTestUtils.makeDummySequence(40, (byte)'G'); - final byte[] doubleDup = SVTestUtils.makeDummySequence(20, (byte)'C'); - outputStream.reset(); - outputStream.write(leftRefFlank);outputStream.write(doubleDup);outputStream.write(rightRefFlank); - byte[] contigSeq = outputStream.toByteArray(); + {// insertion case + // '+' strand representation + final byte[] leftRefFlank = SVTestUtils.makeDummySequence(40, (byte)'A'); + final byte[] rightRefFlank = SVTestUtils.makeDummySequence(40, (byte)'G'); + final byte[] doubleDup = SVTestUtils.makeDummySequence(20, (byte)'C'); + outputStream.reset(); + outputStream.write(leftRefFlank);outputStream.write(doubleDup);outputStream.write(rightRefFlank); + byte[] contigSeq = outputStream.toByteArray(); - AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 1 ,50, TextCigarCodec.decode("50M50S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100041, 100090), 51 ,100, TextCigarCodec.decode("50S50M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict); - result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); + AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 1 ,50, TextCigarCodec.decode("50M50S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100041, 100090), 51 ,100, TextCigarCodec.decode("50S50M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict); + result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); - // simple tandem duplication expansion '-' strand representation - SequenceUtil.reverseComplement(leftRefFlank); - SequenceUtil.reverseComplement(rightRefFlank); - SequenceUtil.reverseComplement(doubleDup); - outputStream.reset(); - outputStream.write(rightRefFlank);outputStream.write(doubleDup);outputStream.write(leftRefFlank); - contigSeq = outputStream.toByteArray(); - region1 = new AlignmentInterval(new SimpleInterval("21", 100041, 100090), 1 ,50, TextCigarCodec.decode("50M50S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 51 ,100, TextCigarCodec.decode("50S50M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict); - result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); + // '-' strand representation + SequenceUtil.reverseComplement(leftRefFlank); + SequenceUtil.reverseComplement(rightRefFlank); + SequenceUtil.reverseComplement(doubleDup); + outputStream.reset(); + outputStream.write(rightRefFlank);outputStream.write(doubleDup);outputStream.write(leftRefFlank); + contigSeq = outputStream.toByteArray(); + region1 = new AlignmentInterval(new SimpleInterval("21", 100041, 100090), 1 ,50, TextCigarCodec.decode("50M50S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 51 ,100, TextCigarCodec.decode("50S50M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict); + result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); + } + + {// duplication case + // '+' strand representation + final byte[] leftRefFlank = SVTestUtils.makeDummySequence(40, (byte)'A'); + final byte[] rightRefFlank = SVTestUtils.makeDummySequence(40, (byte)'G'); + final byte[] doubleDup = SVTestUtils.makeDummySequence(110, (byte)'C'); + outputStream.reset(); + outputStream.write(leftRefFlank);outputStream.write(doubleDup);outputStream.write(rightRefFlank); + byte[] contigSeq = outputStream.toByteArray(); + + AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100095), 1 ,95, TextCigarCodec.decode("95M95S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100041, 100135), 96 ,190, TextCigarCodec.decode("95S95M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict); + result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); + + // '-' strand representation + SequenceUtil.reverseComplement(leftRefFlank); + SequenceUtil.reverseComplement(rightRefFlank); + SequenceUtil.reverseComplement(doubleDup); + outputStream.reset(); + outputStream.write(rightRefFlank);outputStream.write(doubleDup);outputStream.write(leftRefFlank); + contigSeq = outputStream.toByteArray(); + region1 = new AlignmentInterval(new SimpleInterval("21", 100041, 100135), 1 ,95, TextCigarCodec.decode("95M95S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100095), 96 ,190, TextCigarCodec.decode("95S95M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict); + result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); + } return result; } /** - * System.out.println(new String(reference.getReferenceBases(dummyOptions, new SimpleInterval("21", 25297100, 25297300)).getBases())); + * Real event, which will be output as INS (but the event was actually from a hg38 sample, but doesn't matter) + * repeat: chr21:26849022-26849037 + * repeat sequence: CCGGGAAATGCTTTTT + * insertedSequenceForwardStrandRep: TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGC + * + * Real event, which will be output as DUP * leftFlank: chr21:25297101-25297163 * repeat: chr21:25297164-25297252 * rightFlank: chr21:25297253-25297300 @@ -629,40 +684,53 @@ public static List> getAllTes * CATTATTGATATTTCATTATGTTCAACAGATGGAGTTAATGTGAATGT * * insertedSequenceForwardStrandRep: CTCTCTCTCT - * - * Return a list of two entries for positive and reverse strand representations. */ private static List forSimpleTandemDuplicationExpansionWithNovelInsertion(final ByteArrayOutputStream outputStream) throws IOException { final List result = new ArrayList<>(); - // simple tandem duplication expansion with novel insertion '+' strand representation - final byte[] leftRefFlank = "GTTAGTAGATATTCTAGCTGACTCAGTTCAGTGTTGCTATGATTAAACAAGAGTGAGTTCCCT".getBytes(); //63 - final byte[] rightRefFlank = "CATTATTGATATTTCATTATGTTCAACAGATGGAGTTAATGTGAATGT".getBytes(); //48 - final byte[] insertedSeq = "CTCTCTCTCT".getBytes(); //10 - final byte[] dup = "AAAAGTAAATGTTATAAGAAATCTTAAGTATTATTTTCTTATGTTTCTAGCCTAATAAAGTGCTTTTATTAAAGCACTTTATTTAAAGG".getBytes(); //89 - outputStream.reset(); - outputStream.write(leftRefFlank);outputStream.write(dup);outputStream.write(insertedSeq);outputStream.write(dup);outputStream.write(rightRefFlank); - byte[] contigSeq = outputStream.toByteArray(); - AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 25297101, 25297252), 1 ,152, TextCigarCodec.decode("152M147S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 25297164, 25297300), 163 ,299, TextCigarCodec.decode("162S137M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict); - result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); + { + AlignmentInterval region1 = SVTestUtils.fromSAMRecordString("asm029081:tig00000\t0\t21\t26847644\t60\t1394M1675S\t*\t0\t0\tTATGGTGACAACAGTTACAGCCTCAGAGTGGCTTTGAGATGGAGATTTGAGATGGAGATTTTGGACACTCATAATCTCCATCTCAAGGAGAGGTGGCCCAGAGGCTGAATGAAGATGAGTGAAGGTAGATGTGATTCCCTTAAATTGGGGCAAAAAGGGACAAAAACAGCAGAAAATCTGTATCTTTAAAGACATATGTAATGTATTTCAGTCTATCAACTTCTCTACATAAACTTTAGCTTTTAAAAATATGTTAACGTAAGTTTGACCTTTAGTGTGTTTCTACCTGCAGGGTATTCTTATTGGAGGTTTGTTTAAAAGCATACATTTCTGATCTTGAATGGGTTACTACAAATCCATTATAATTGTTTCATATTTCATGTTGCAGATACAAGTAGGGTTGAAAAAACAGTGAGTTAAAGGCAAAAGGATGGCCGGGAACATGGCTTTTTTATTCTCTGGGTTTCTATCCAGATTTCTGTTCTTTTGCATAATGACTCCAATCTGTTGTGCACCTGTAGTTCTGGGAAATGATTCTTTTTTAATCGCTTCAACAGAGACATGGATGTTGGAGTTGCCAACTACTAAGCTGAAAAACTCCATCTATGCTCAGAAGAACATTTAATCCACTTACTTTTTCTCTTTTATTTAAAGATTAGCACTCATCAGGCATTTGTGGTAATATGCAAATATATACATAGGACATATATGTATATTTATAAGCAAAATGTGAATTGGAAAAACATTTGAATGTAGAAACAAGACCACAGGAGTAAATTTGTACAAGGCACTAGTAAAAGTGACATGTAATATGGGGTTCTTGTAGTGAGTTTCATAATCCAATTTTTGCTCCTTGATTTGAATGGGCACCCAAAATAACACATGCTATCCTAATCCCTACTCCCCATATTTTGGGTTTTATTTTTATAGAATACATATGGGCTTATATAAACATTAATCTCAACATGTTCTAATTTACATATGTAAGCTAATTTTTATTTCTAGAGATAACAGAACAAAACTCAAAACATTTGACATAAAATTATTGGAACAATTAACAGTTTGACCTATTAAACACATTATTGTCCTCTATGAACAGAGGGACTGTCTGAAAAAAAGAACAAGTTGTCTGCATTTTAAAGTGAGAGATAAGCATCAAGGTGTCAATTTCTATTTACACCTTATGTGTTCTTATTTGTTTCACTGATTCATATGTTATAGACACAATATTCTATTCACAATTTTCACGACGTCTATACCAAAGTAAGTATTCAACAAGTAGCCATGAAATGAGGAAATCTGGTAATATACATGAGCTATTAGAATTGTTTTAATGTAAACATTGTCTAGAGAAACAACTAATGTGCATATTTCATAACCGGGAAATGCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT\t*\tSA:Z:21,26849022,+,1704S657M2I706M,60,2;chr10,97348533,+,1388S317M1364S,0,0;\tMD:Z:1204A189\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:1389\tXS:i:0", true); + AlignmentInterval region2 = SVTestUtils.fromSAMRecordString("asm029081:tig00000\t2048\t21\t26849022\t60\t1704H657M2I706M\t*\t0\t0\tCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT\t*\tSA:Z:21,26847644,+,1394M1675S,60,1;chr10,97348533,+,1388S317M1364S,0,0;\tMD:Z:1363\tRG:Z:GATKSVContigAlignments\tNM:i:2\tAS:i:1345\tXS:i:0", true); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), "TATGGTGACAACAGTTACAGCCTCAGAGTGGCTTTGAGATGGAGATTTGAGATGGAGATTTTGGACACTCATAATCTCCATCTCAAGGAGAGGTGGCCCAGAGGCTGAATGAAGATGAGTGAAGGTAGATGTGATTCCCTTAAATTGGGGCAAAAAGGGACAAAAACAGCAGAAAATCTGTATCTTTAAAGACATATGTAATGTATTTCAGTCTATCAACTTCTCTACATAAACTTTAGCTTTTAAAAATATGTTAACGTAAGTTTGACCTTTAGTGTGTTTCTACCTGCAGGGTATTCTTATTGGAGGTTTGTTTAAAAGCATACATTTCTGATCTTGAATGGGTTACTACAAATCCATTATAATTGTTTCATATTTCATGTTGCAGATACAAGTAGGGTTGAAAAAACAGTGAGTTAAAGGCAAAAGGATGGCCGGGAACATGGCTTTTTTATTCTCTGGGTTTCTATCCAGATTTCTGTTCTTTTGCATAATGACTCCAATCTGTTGTGCACCTGTAGTTCTGGGAAATGATTCTTTTTTAATCGCTTCAACAGAGACATGGATGTTGGAGTTGCCAACTACTAAGCTGAAAAACTCCATCTATGCTCAGAAGAACATTTAATCCACTTACTTTTTCTCTTTTATTTAAAGATTAGCACTCATCAGGCATTTGTGGTAATATGCAAATATATACATAGGACATATATGTATATTTATAAGCAAAATGTGAATTGGAAAAACATTTGAATGTAGAAACAAGACCACAGGAGTAAATTTGTACAAGGCACTAGTAAAAGTGACATGTAATATGGGGTTCTTGTAGTGAGTTTCATAATCCAATTTTTGCTCCTTGATTTGAATGGGCACCCAAAATAACACATGCTATCCTAATCCCTACTCCCCATATTTTGGGTTTTATTTTTATAGAATACATATGGGCTTATATAAACATTAATCTCAACATGTTCTAATTTACATATGTAAGCTAATTTTTATTTCTAGAGATAACAGAACAAAACTCAAAACATTTGACATAAAATTATTGGAACAATTAACAGTTTGACCTATTAAACACATTATTGTCCTCTATGAACAGAGGGACTGTCTGAAAAAAAGAACAAGTTGTCTGCATTTTAAAGTGAGAGATAAGCATCAAGGTGTCAATTTCTATTTACACCTTATGTGTTCTTATTTGTTTCACTGATTCATATGTTATAGACACAATATTCTATTCACAATTTTCACGACGTCTATACCAAAGTAAGTATTCAACAAGTAGCCATGAAATGAGGAAATCTGGTAATATACATGAGCTATTAGAATTGTTTTAATGTAAACATTGTCTAGAGAAACAACTAATGTGCATATTTCATAACCGGGAAATGCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT".getBytes(), b37_seqDict); + result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); - // simple tandem duplication expansion with novel insertion '-' strand representation - SequenceUtil.reverseComplement(leftRefFlank); - SequenceUtil.reverseComplement(rightRefFlank); - SequenceUtil.reverseComplement(insertedSeq); - SequenceUtil.reverseComplement(dup); - outputStream.reset(); - outputStream.write(rightRefFlank);outputStream.write(dup);outputStream.write(insertedSeq);outputStream.write(dup);outputStream.write(leftRefFlank); - contigSeq = outputStream.toByteArray(); + region1 = SVTestUtils.fromSAMRecordString("asm000001:tig00001\t2064\t21\t26849022\t60\t1704H657M3I706M\t*\t0\t0\tCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT\t*\tSA:Z:21,26847644,-,1394M1676S,60,1;chr10,97348533,-,1388S317M1365S,0,0;\tMD:Z:1363\tRG:Z:GATKSVContigAlignments\tNM:i:3\tAS:i:1344\tXS:i:0", true); + region2 = SVTestUtils.fromSAMRecordString("asm000001:tig00001\t16\t21\t26847644\t60\t1394M1676S\t*\t0\t0\tTATGGTGACAACAGTTACAGCCTCAGAGTGGCTTTGAGATGGAGATTTGAGATGGAGATTTTGGACACTCATAATCTCCATCTCAAGGAGAGGTGGCCCAGAGGCTGAATGAAGATGAGTGAAGGTAGATGTGATTCCCTTAAATTGGGGCAAAAAGGGACAAAAACAGCAGAAAATCTGTATCTTTAAAGACATATGTAATGTATTTCAGTCTATCAACTTCTCTACATAAACTTTAGCTTTTAAAAATATGTTAACGTAAGTTTGACCTTTAGTGTGTTTCTACCTGCAGGGTATTCTTATTGGAGGTTTGTTTAAAAGCATACATTTCTGATCTTGAATGGGTTACTACAAATCCATTATAATTGTTTCATATTTCATGTTGCAGATACAAGTAGGGTTGAAAAAACAGTGAGTTAAAGGCAAAAGGATGGCCGGGAACATGGCTTTTTTATTCTCTGGGTTTCTATCCAGATTTCTGTTCTTTTGCATAATGACTCCAATCTGTTGTGCACCTGTAGTTCTGGGAAATGATTCTTTTTTAATCGCTTCAACAGAGACATGGATGTTGGAGTTGCCAACTACTAAGCTGAAAAACTCCATCTATGCTCAGAAGAACATTTAATCCACTTACTTTTTCTCTTTTATTTAAAGATTAGCACTCATCAGGCATTTGTGGTAATATGCAAATATATACATAGGACATATATGTATATTTATAAGCAAAATGTGAATTGGAAAAACATTTGAATGTAGAAACAAGACCACAGGAGTAAATTTGTACAAGGCACTAGTAAAAGTGACATGTAATATGGGGTTCTTGTAGTGAGTTTCATAATCCAATTTTTGCTCCTTGATTTGAATGGGCACCCAAAATAACACATGCTATCCTAATCCCTACTCCCCATATTTTGGGTTTTATTTTTATAGAATACATATGGGCTTATATAAACATTAATCTCAACATGTTCTAATTTACATATGTAAGCTAATTTTTATTTCTAGAGATAACAGAACAAAACTCAAAACATTTGACATAAAATTATTGGAACAATTAACAGTTTGACCTATTAAACACATTATTGTCCTCTATGAACAGAGGGACTGTCTGAAAAAAAGAACAAGTTGTCTGCATTTTAAAGTGAGAGATAAGCATCAAGGTGTCAATTTCTATTTACACCTTATGTGTTCTTATTTGTTTCACTGATTCATATGTTATAGACACAATATTCTATTCACAATTTTCACGACGTCTATACCAAAGTAAGTATTCAACAAGTAGCCATGAAATGAGGAAATCTGGTAATATACATGAGCTATTAGAATTGTTTTAATGTAAACATTGTCTAGAGAAACAACTAATGTGCATATTTCATAACCGGGAAATGCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT\t*\tSA:Z:21,26849022,-,1704S657M3I706M,60,3;chr10,97348533,-,1388S317M1365S,0,0;\tMD:Z:1204A189\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:1384\tXS:i:0", true); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), "TATGGTGACAACAGTTACAGCCTCAGAGTGGCTTTGAGATGGAGATTTGAGATGGAGATTTTGGACACTCATAATCTCCATCTCAAGGAGAGGTGGCCCAGAGGCTGAATGAAGATGAGTGAAGGTAGATGTGATTCCCTTAAATTGGGGCAAAAAGGGACAAAAACAGCAGAAAATCTGTATCTTTAAAGACATATGTAATGTATTTCAGTCTATCAACTTCTCTACATAAACTTTAGCTTTTAAAAATATGTTAACGTAAGTTTGACCTTTAGTGTGTTTCTACCTGCAGGGTATTCTTATTGGAGGTTTGTTTAAAAGCATACATTTCTGATCTTGAATGGGTTACTACAAATCCATTATAATTGTTTCATATTTCATGTTGCAGATACAAGTAGGGTTGAAAAAACAGTGAGTTAAAGGCAAAAGGATGGCCGGGAACATGGCTTTTTTATTCTCTGGGTTTCTATCCAGATTTCTGTTCTTTTGCATAATGACTCCAATCTGTTGTGCACCTGTAGTTCTGGGAAATGATTCTTTTTTAATCGCTTCAACAGAGACATGGATGTTGGAGTTGCCAACTACTAAGCTGAAAAACTCCATCTATGCTCAGAAGAACATTTAATCCACTTACTTTTTCTCTTTTATTTAAAGATTAGCACTCATCAGGCATTTGTGGTAATATGCAAATATATACATAGGACATATATGTATATTTATAAGCAAAATGTGAATTGGAAAAACATTTGAATGTAGAAACAAGACCACAGGAGTAAATTTGTACAAGGCACTAGTAAAAGTGACATGTAATATGGGGTTCTTGTAGTGAGTTTCATAATCCAATTTTTGCTCCTTGATTTGAATGGGCACCCAAAATAACACATGCTATCCTAATCCCTACTCCCCATATTTTGGGTTTTATTTTTATAGAATACATATGGGCTTATATAAACATTAATCTCAACATGTTCTAATTTACATATGTAAGCTAATTTTTATTTCTAGAGATAACAGAACAAAACTCAAAACATTTGACATAAAATTATTGGAACAATTAACAGTTTGACCTATTAAACACATTATTGTCCTCTATGAACAGAGGGACTGTCTGAAAAAAAGAACAAGTTGTCTGCATTTTAAAGTGAGAGATAAGCATCAAGGTGTCAATTTCTATTTACACCTTATGTGTTCTTATTTGTTTCACTGATTCATATGTTATAGACACAATATTCTATTCACAATTTTCACGACGTCTATACCAAAGTAAGTATTCAACAAGTAGCCATGAAATGAGGAAATCTGGTAATATACATGAGCTATTAGAATTGTTTTAATGTAAACATTGTCTAGAGAAACAACTAATGTGCATATTTCATAACCGGGAAATGCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGCAGGCTCCGCCCCCTGGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCTCGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCCGGGAAATGCTTTTTATTCACATTTTAGTACCATAAGATTGACTAATTAGAAATAGGGAAGCTGTTTGGTATTAGTCCTACTTTTGGGAACATATGGTCTAAAGTAATATTGGACAAATGGATATTTTAATTGATCACAAATGAGAAAGTAGTTAGAAAACTTCTAGTTTAAACAGGTTATATACCCAGAAGTATTGCAAATATTGGAGACAGAAAAATTATTGTAGCTTGCATTTAGACTCAAAATTGATATTCCCTCCACATCATGCCCTCACAGAACTCAGAGGAGTATGATCTGCCAGATCGACTCCCTTTGTGTAACTGATGCAGAAATGAGACTCAAAAGGGTTAAGTAGTTTTCCCAAGATGTCATAGCGAGAGTCAAACTGGGGGCTTAGAAATGACTCTGACTCATAACTTTTAATCAACTGTTTTGACATTTTAACCTATCTAATTGTGTAGGAGGTAATTATATTGTCAGACTTTGGAATGATGTTGTTTCCAGTAAAGTTTTGTTTTAATTATAAATAGGAATTTTCCAGCAATAAAAAATTTCCACCTTAAAAGATTCTCAGACTTTAGTACATCTTTCTCCAAACACAAGGTGGCGATGGTCTACAACAAATGATGTGCGACTTGGTGTTTTTTTTTTTTTTTTGTTCTTTCCTTTCCTTTTTTATTCTTAATAGTTCAAGTTAAGAATTTGCAAAAGTTTCACATCTTCTCAATCATGTTTAATAAATTCTAATTAAATATTCTCCTACCTCCTAGTATTATGGAAAATATTTTAAAAATATTACAATGTTAAATGAATTTATTCTTGAGGGCATAATAAAATGCGTTTTTAAATCAACTACTTTTTAATTATGTGTTTGTATTACCATAAACAAAAATCCAATTAAACTTTAAAGAAAGAAAACTGCCTCTGACAAAATAATACTGTGGACCGCTTTTATTCATTACATTTGAGAACTTCTTGTCATTCAAATGAAAAGATTAAGTACATTTGCAATCCACTAAAACAGATTAAAAACTCATTCATTTATTCAATAGATATTAAGTACATACAGTATGTTTAGTATACATTAATACTTGACAATCAATACTGGTTAACTGGTTTCCCTGGTTTAGAAATTTTCCTTAGCAACAACGTAAGGCTTAAAATGAAAAAAGAAAAGTGAGAAAATGTTCTACCACCAGGTGGTGACAAAAGATAAAATTTAAAATCGCTCTTAATGAGCACATACTTCATGTAATTCTTGAATACTGCAAATATAAGTGACTTCCGAATGTCATGTGAATTTAAAATCATATTCTAGGAATATTTTATTAATTAAAGCAAATTAATATTAACATATTATCTCT".getBytes(), b37_seqDict); + result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); + } - region1 = new AlignmentInterval(new SimpleInterval("21", 25297164, 25297300), 1 ,137, TextCigarCodec.decode("137M162S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - region2 = new AlignmentInterval(new SimpleInterval("21", 25297101, 25297252), 148 ,299, TextCigarCodec.decode("147S152M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict); - result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); + { + // simple tandem duplication expansion with novel insertion '+' strand representation + final byte[] leftRefFlank = "GTTAGTAGATATTCTAGCTGACTCAGTTCAGTGTTGCTATGATTAAACAAGAGTGAGTTCCCT".getBytes(); //63 + final byte[] rightRefFlank = "CATTATTGATATTTCATTATGTTCAACAGATGGAGTTAATGTGAATGT".getBytes(); //48 + final byte[] insertedSeq = "CTCTCTCTCT".getBytes(); //10 + final byte[] dup = "AAAAGTAAATGTTATAAGAAATCTTAAGTATTATTTTCTTATGTTTCTAGCCTAATAAAGTGCTTTTATTAAAGCACTTTATTTAAAGG".getBytes(); //89 + outputStream.reset(); + outputStream.write(leftRefFlank);outputStream.write(dup);outputStream.write(insertedSeq);outputStream.write(dup);outputStream.write(rightRefFlank); + byte[] contigSeq = outputStream.toByteArray(); + + AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 25297101, 25297252), 1 ,152, TextCigarCodec.decode("152M147S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 25297164, 25297300), 163 ,299, TextCigarCodec.decode("162S137M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict); + result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); + + // simple tandem duplication expansion with novel insertion '-' strand representation + SequenceUtil.reverseComplement(leftRefFlank); + SequenceUtil.reverseComplement(rightRefFlank); + SequenceUtil.reverseComplement(insertedSeq); + SequenceUtil.reverseComplement(dup); + outputStream.reset(); + outputStream.write(rightRefFlank);outputStream.write(dup);outputStream.write(insertedSeq);outputStream.write(dup);outputStream.write(leftRefFlank); + contigSeq = outputStream.toByteArray(); + + region1 = new AlignmentInterval(new SimpleInterval("21", 25297164, 25297300), 1 ,137, TextCigarCodec.decode("137M162S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + region2 = new AlignmentInterval(new SimpleInterval("21", 25297101, 25297252), 148 ,299, TextCigarCodec.decode("147S152M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict); + result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); + } return result; } @@ -778,4 +846,64 @@ public static List> getAllTes return result; } + + /** + * See {@link #forComplexTandemDuplication()} . + * Here we are simply making + */ + private static List + forComplexTandemDuplicationIns() { + + final List result = new ArrayList<>(); + final String leftRefFlank = "TGCCAGGTTACATGGCAAAGAGGGTAGATAT"; // 31 + final String rightRefFlank = "TGGTGCAAATGCCATTTATGCTCCTCTCCACCCATATCC"; // 39 + final String firstRepeat = "GGGGAGCTGTGAAGAATGGAGCCAGTAATTAAATTCACTGAA"; // 42 + final String secondRepeat = "GGGGAGCTGTGAAGAATGGAGCCAGTAATTAAATTCACTGAA"; // 42 + final String pseudoHomology = "GGGCAGCTGTGGA"; // 13 + + + // first test : expansion from 1 unit to 2 units with pseudo-homology + final byte[] fakeRefSeqForComplexExpansionWithPseudoHomology = String.format("%s%s%s%s", leftRefFlank, firstRepeat, pseudoHomology, rightRefFlank).getBytes(); + final byte[] contigSeqForComplexExpansionWithPseudoHomology = String.format("%s%s%s%s%s", leftRefFlank, firstRepeat, secondRepeat, pseudoHomology, rightRefFlank).getBytes(); + AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("20", 312579, 312664), 1 ,86, TextCigarCodec.decode("86M81S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("20", 312610, 312703), 74 ,167, TextCigarCodec.decode("73S94M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + NovelAdjacencyAndAltHaplotype breakpoints = + new NovelAdjacencyAndAltHaplotype( + new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), + contigSeqForComplexExpansionWithPseudoHomology, b37_seqDict); + result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); + + final byte[] contigSeqForComplexExpansionWithPseudoHomology_reverseStrand = Arrays.copyOf(contigSeqForComplexExpansionWithPseudoHomology, contigSeqForComplexExpansionWithPseudoHomology.length); + SequenceUtil.reverseComplement(contigSeqForComplexExpansionWithPseudoHomology_reverseStrand); + region1 = new AlignmentInterval(new SimpleInterval("20", 312610, 312703), 1 ,94, TextCigarCodec.decode("94M73S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + region2 = new AlignmentInterval(new SimpleInterval("20", 312579, 312664), 82 ,167, TextCigarCodec.decode("81S86M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + breakpoints = new NovelAdjacencyAndAltHaplotype( + new ChimericAlignment(region1, region2, Collections.emptyList(), + "asm000001:tig00001", b37_seqDict), + contigSeqForComplexExpansionWithPseudoHomology_reverseStrand, b37_seqDict); + result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); + + // second test: expansion from 2 units to 3 units without pseudo-homology + final byte[] contigSeqForComplexExpansionNoPseudoHomology = String.format("%s%s%s%s%s", leftRefFlank, firstRepeat, secondRepeat, firstRepeat, rightRefFlank).getBytes(); + region1 = new AlignmentInterval(new SimpleInterval("20", 312579, 312693), 1, 115, TextCigarCodec.decode("115M81S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + region2 = new AlignmentInterval(new SimpleInterval("20", 312610, 312732), 74, 196, TextCigarCodec.decode("73S123M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + breakpoints = + new NovelAdjacencyAndAltHaplotype( + new ChimericAlignment(region1, region2, Collections.emptyList(), + "asm000001:tig00001", b37_seqDict), + contigSeqForComplexExpansionNoPseudoHomology, b37_seqDict); + result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); + + final byte[] contigSeqForComplexExpansionNoPseudoHomology_reverseStrand = Arrays.copyOf(contigSeqForComplexExpansionNoPseudoHomology, contigSeqForComplexExpansionNoPseudoHomology.length); + SequenceUtil.reverseComplement(contigSeqForComplexExpansionNoPseudoHomology_reverseStrand); + region1 = new AlignmentInterval(new SimpleInterval("20", 312610, 312732), 1, 123, TextCigarCodec.decode("123M73S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + region2 = new AlignmentInterval(new SimpleInterval("20", 312579, 312693), 82, 196, TextCigarCodec.decode("81S115M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + breakpoints = new NovelAdjacencyAndAltHaplotype( + new ChimericAlignment(region1, region2, Collections.emptyList(), + "asm000001:tig00001", b37_seqDict), + contigSeqForComplexExpansionNoPseudoHomology_reverseStrand, b37_seqDict); + result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); + + return result; + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVTypeUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVTypeUnitTest.java index 69b4bd3d5bd..01267d5a16d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVTypeUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVTypeUnitTest.java @@ -94,12 +94,12 @@ private Object[][] forAltAlleleSvLenAndIdProductions_stable() { DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING}); // simple tandem dup expansion from 1 unit to 2 units - data.add(new Object[]{forSimpleTanDupExpansion_minus.biPathBubble, inferSimpleTypeFromNovelAdjacency(forSimpleTanDupExpansion_minus.biPathBubble), + data.add(new Object[]{forSimpleTanDupExpansion_ins_minus.biPathBubble, inferSimpleTypeFromNovelAdjacency(forSimpleTanDupExpansion_ins_minus.biPathBubble), SYMB_ALT_ALLELE_DUP, 10, DUP_TAN_EXPANSION_INTERNAL_ID_START_STRING}); // simple tandem dup expansion from 1 unit to 2 units and novel insertion - data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_plus.biPathBubble, inferSimpleTypeFromNovelAdjacency(forSimpleTanDupExpansionWithNovelIns_plus.biPathBubble), + data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_dup_plus.biPathBubble, inferSimpleTypeFromNovelAdjacency(forSimpleTanDupExpansionWithNovelIns_dup_plus.biPathBubble), SYMB_ALT_ALLELE_DUP, 99, DUP_TAN_EXPANSION_INTERNAL_ID_START_STRING}); @@ -195,13 +195,23 @@ private Object[][] forAltAlleleSvLenAndIdProductions_new() { SYMB_ALT_ALLELE_DEL, -10, DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING}); - // simple tandem dup expansion from 1 unit to 2 units - data.add(new Object[]{forSimpleTanDupExpansion_minus.biPathBubble, forSimpleTanDupExpansion_minus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), - SYMB_ALT_ALLELE_DUP, 10, + // simple tandem dup expansion from 1 unit to 2 units that will be called as insertion + data.add(new Object[]{forSimpleTanDupExpansion_ins_minus.biPathBubble, forSimpleTanDupExpansion_ins_minus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + SYMB_ALT_ALLELE_INS, 10, + SimpleSVType.TYPES.INS.name()}); + + // simple tandem dup expansion from 1 unit to 2 units that will be called as duplication + data.add(new Object[]{forSimpleTanDupExpansion_dup_minus.biPathBubble, forSimpleTanDupExpansion_dup_minus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + SYMB_ALT_ALLELE_DUP, 55, DUP_TAN_EXPANSION_INTERNAL_ID_START_STRING}); - // simple tandem dup expansion from 1 unit to 2 units and novel insertion - data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_plus.biPathBubble, forSimpleTanDupExpansionWithNovelIns_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + // simple tandem dup expansion from 1 unit to 2 units and novel insertion that will be called as insertion + data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_ins_plus.biPathBubble, forSimpleTanDupExpansionWithNovelIns_ins_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + SYMB_ALT_ALLELE_INS, 326, + SimpleSVType.TYPES.INS.name()}); + + // simple tandem dup expansion from 1 unit to 2 units and novel insertion that will be called as duplication + data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_dup_plus.biPathBubble, forSimpleTanDupExpansionWithNovelIns_dup_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), SYMB_ALT_ALLELE_DUP, 99, DUP_TAN_EXPANSION_INTERNAL_ID_START_STRING}); @@ -226,6 +236,15 @@ private Object[][] forAltAlleleSvLenAndIdProductions_new() { SYMB_ALT_ALLELE_DUP, 96, DUP_TAN_EXPANSION_INTERNAL_ID_START_STRING}); + // short tandem dup expansion from 1 unit to 2 units with pseudo-homology + data.add(new Object[]{forComplexTanDup_1to2_short_pseudoHom_plus.biPathBubble, forComplexTanDup_1to2_short_pseudoHom_plus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + SYMB_ALT_ALLELE_INS, 42, + SimpleSVType.TYPES.INS.name()}); + // short tandem dup expansion from 2 units to 3 units + data.add(new Object[]{forComplexTanDup_2to3_short_noPseudoHom_minus.biPathBubble, forComplexTanDup_2to3_short_noPseudoHom_minus.biPathBubble.toSimpleOrBNDTypes(b37_reference, b37_seqDict).get(0), + SYMB_ALT_ALLELE_INS, 42, + SimpleSVType.TYPES.INS.name()}); + return data.toArray(new Object[data.size()][]); } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ChimericAlignmentUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ChimericAlignmentUnitTest.java index d424c291bab..6d6f411ab5c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ChimericAlignmentUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ChimericAlignmentUnitTest.java @@ -85,17 +85,17 @@ static List> result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); // tandem duplication simple expansion - testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupExpansion_plus; + testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupExpansion_ins_plus; result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); - testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupExpansion_minus; + testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupExpansion_ins_minus; result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); // tandem duplication simple expansion with novel insertion - testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupExpansionWithNovelIns_plus; + testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupExpansionWithNovelIns_dup_plus; result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); - testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupExpansionWithNovelIns_minus; + testData = SimpleSVDiscoveryTestDataProvider.forSimpleTanDupExpansionWithNovelIns_dup_minus; result.add(new Tuple3<>(testData.firstAlignment, testData.secondAlignment, SimpleSVDiscoveryTestDataProvider.b37_seqDict)); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotypeUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotypeUnitTest.java index d9cc0a4baa2..8414d600631 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotypeUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotypeUnitTest.java @@ -327,8 +327,8 @@ public void testGetBreakpoints_tandemDuplication_contraction_simple() { @Test(groups = "sv") public void testGetBreakpoints_tandemDuplication_expansion_simple() { - final NovelAdjacencyAndAltHaplotype breakpoints = forSimpleTanDupExpansion_plus.biPathBubble; - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = forSimpleTanDupExpansion_minus.biPathBubble; + final NovelAdjacencyAndAltHaplotype breakpoints = forSimpleTanDupExpansion_ins_plus.biPathBubble; + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = forSimpleTanDupExpansion_ins_minus.biPathBubble; seeIfItWorksForNonSimpleTranslocations(breakpoints, StrandSwitch.NO_SWITCH, new SimpleInterval("21", 100040, 100040), new SimpleInterval("21", 100040, 100040), new SimpleInterval("21", 100041, 100050), @@ -348,8 +348,8 @@ public void testGetBreakpoints_tandemDuplication_expansion_andNovelInsertion() { final String dup = "AAAAGTAAATGTTATAAGAAATCTTAAGTATTATTTTCTTATGTTTCTAGCCTAATAAAGTGCTTTTATTAAAGCACTTTATTTAAAGG"; //89 final String alt = dup + insertedSeq + dup; - final NovelAdjacencyAndAltHaplotype breakpoints = forSimpleTanDupExpansionWithNovelIns_plus.biPathBubble; - final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = forSimpleTanDupExpansionWithNovelIns_minus.biPathBubble; + final NovelAdjacencyAndAltHaplotype breakpoints = forSimpleTanDupExpansionWithNovelIns_dup_plus.biPathBubble; + final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = forSimpleTanDupExpansionWithNovelIns_dup_minus.biPathBubble; seeIfItWorksForNonSimpleTranslocations(breakpoints, StrandSwitch.NO_SWITCH, new SimpleInterval("21", 25297163, 25297163), new SimpleInterval("21", 25297163, 25297163), new SimpleInterval("21", 25297164,25297252), @@ -562,11 +562,15 @@ private Object[][] forTypeInference() { Collections.singletonList( new Tuple2<>(DEL.name(), ImmutableSet.of(DUP_TAN_CONTRACTION_STRING)) )}); // simple tandem dup expansion from 1 unit to 2 units - data.add(new Object[]{forSimpleTanDupExpansion_minus.biPathBubble, + data.add(new Object[]{forSimpleTanDupExpansion_ins_minus.biPathBubble, + Collections.singletonList( new Tuple2<>(INS.name(), defaultKeys) )}); + data.add(new Object[]{forSimpleTanDupExpansion_dup_minus.biPathBubble, Collections.singletonList( new Tuple2<>(DUP.name(), ImmutableSet.of(DUP_TAN_EXPANSION_STRING)) )}); // simple tandem dup expansion from 1 unit to 2 units and novel insertion - data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_plus.biPathBubble, + data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_ins_plus.biPathBubble, + Collections.singletonList( new Tuple2<>(INS.name(), defaultKeys) )}); + data.add(new Object[]{forSimpleTanDupExpansionWithNovelIns_dup_plus.biPathBubble, Collections.singletonList( new Tuple2<>(DUP.name(), ImmutableSet.of(DUP_TAN_EXPANSION_STRING)) )}); // tandem dup expansion from 1 unit to 2 units with pseudo-homology @@ -585,6 +589,13 @@ private Object[][] forTypeInference() { data.add(new Object[]{forComplexTanDup_2to3_noPseudoHom_plus.biPathBubble, Collections.singletonList( new Tuple2<>(DUP.name(), ImmutableSet.of(DUP_TAN_EXPANSION_STRING)) )}); + // short tandem dup expansion from 1 unit to 2 units with pseudo-homology + data.add(new Object[]{forComplexTanDup_1to2_short_pseudoHom_plus.biPathBubble, + Collections.singletonList( new Tuple2<>(INS.name(), defaultKeys) )}); + // short tandem dup expansion from 2 units to 3 units + data.add(new Object[]{forComplexTanDup_2to3_short_noPseudoHom_minus.biPathBubble, + Collections.singletonList( new Tuple2<>(INS.name(), defaultKeys) )}); + return data.toArray(new Object[data.size()][]); } } \ No newline at end of file