Skip to content

Commit

Permalink
M2 doesn't use very short stubs of clipped reads for genotyping (#5057)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin authored Jul 30, 2018
1 parent 2347646 commit b6a630a
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ public final class Mutect2Engine implements AssemblyRegionEvaluator {
public static final int MAX_NORMAL_QUAL_SUM = 100;
public static final int MIN_PALINDROME_SIZE = 5;

// after trimming to fit the assembly window, throw away read stubs shorter than this length
// if we don't, the several bases left of reads that end just within the assembly window can
// get realigned incorrectly. See https://github.com/broadinstitute/gatk/issues/5060
public static final int MINIMUM_READ_LENGTH_AFTER_TRIMMING = 10;

private M2ArgumentCollection MTAC;
private SAMFileHeader header;

Expand Down Expand Up @@ -193,6 +198,9 @@ public List<VariantContext> callRegion(final AssemblyRegion originalAssemblyRegi
}

final AssemblyRegion regionForGenotyping = assemblyResult.getRegionForGenotyping();
final List<GATKRead> readStubs = regionForGenotyping.getReads().stream()
.filter(r -> r.getLength() < MINIMUM_READ_LENGTH_AFTER_TRIMMING).collect(Collectors.toList());
regionForGenotyping.removeAll(readStubs);

final Map<String,List<GATKRead>> reads = splitReadsBySample( regionForGenotyping.getReads() );

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ public class Mutect2IntegrationTest extends CommandLineProgramTest {
private static final File NO_CONTAMINATION_TABLE = new File(toolsTestDir, "mutect/no-contamination.table");
private static final File FIVE_PCT_CONTAMINATION_TABLE = new File(toolsTestDir, "mutect/five-pct-contamination.table");
private static final File TEN_PCT_CONTAMINATION_TABLE = new File(toolsTestDir, "mutect/ten-pct-contamination.table");

private static final File NA12878_MITO_BAM = new File(toolsTestDir, "mutect/mito/NA12878.bam");
private static final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta");

/**
* Several DREAM challenge bams with synthetic truth data. In order to keep file sizes manageable, bams are restricted
* to chromosome 20, leaving ~100-200 variants, and then further restricted to 400-bp intervals centered around
Expand Down Expand Up @@ -466,6 +470,35 @@ public void testMinBaseQualityScore() throws Exception {
}
}

// basic test on a small chunk of NA12878 mitochondria. This is not a validation, but rather a sanity check
// that M2 makes obvious calls, doesn't trip up on the beginning of the circular chromosome, and can handle high depth
@Test
public void testMitochondria() throws Exception {
Utils.resetRandomGenerator();
final File unfilteredVcf = createTempFile("unfiltered", ".vcf");

final List<String> args = Arrays.asList("-I", NA12878_MITO_BAM.getAbsolutePath(),
"-" + M2ArgumentCollection.TUMOR_SAMPLE_SHORT_NAME, "NA12878",
"-R", MITO_REF.getAbsolutePath(),
"-L", "chrM:1-1000",
"-min-pruning", "5",
"-O", unfilteredVcf.getAbsolutePath());
runCommandLine(args);


final List<VariantContext> variants = VariantContextTestUtils.streamVcf(unfilteredVcf).collect(Collectors.toList());
final Set<String> variantKeys = variants.stream().map(vc -> keyForVariant(vc)).collect(Collectors.toSet());

final List<String> expectedKeys = Arrays.asList(
"chrM:152-152 [T*, C]",
"chrM:263-263 [A*, G]",
"chrM:301-301 [A*, AC]",
"chrM:302-302 [A*, AC, C, ACC]",
"chrM:310-310 [T*, TC]",
"chrM:750-750 [A*, G]");
Assert.assertTrue(expectedKeys.stream().allMatch(variantKeys::contains));
}



@DataProvider(name="bamoutVariations")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
@HD VN:1.5
@SQ SN:chrM LN:16569 M5:c68f52674c9fb33aef52dcf399755519 UR:file:/humgen/gsa-hpprojects/dev/mshand/SpecOps/Mitochondria/MitochondriaOnlyFastas/Homo_sapiens_assembly38.mt_only.fasta
Loading

0 comments on commit b6a630a

Please sign in to comment.