Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better default AF for M2 tumor-normal mode; improved M2 STR filter #4690

Merged
merged 2 commits into from
Apr 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection
public static final String MAX_SUSPICIOUS_READS_PER_ALIGNMENT_START_LONG_NAME = "max-suspicious-reads-per-alignment-start";
public static final String NORMAL_LOD_LONG_NAME = "normal-lod";


public static final double DEFAULT_AF_FOR_TUMOR_ONLY_CALLING = 5e-8;
public static final double DEFAULT_AF_FOR_TUMOR_NORMAL_CALLING = 1e-5;

//TODO: HACK ALERT HACK ALERT HACK ALERT
//TODO: GATK4 does not yet have a way to tag inputs, eg -I:tumor tumor.bam -I:normal normal.bam,
//TODO: so for now we require the user to specify bams *both* as inputs, with -I tumor.bam -I normal.bam
Expand Down Expand Up @@ -86,7 +90,12 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection
@Argument(fullName= DEFAULT_AF_LONG_NAME, shortName = DEFAULT_AF_SHORT_NAME,
doc="Population allele fraction assigned to alleles not found in germline resource. Please see docs/mutect/mutect2.pdf for" +
"a derivation of the default value.", optional = true)
public double afOfAllelesNotInGermlineResource = 5e-8;
public double afOfAllelesNotInGermlineResource = -1;

public double getDefaultAlleleFrequency() {
return afOfAllelesNotInGermlineResource >= 0 ? afOfAllelesNotInGermlineResource :
(normalSampleName == null ? DEFAULT_AF_FOR_TUMOR_ONLY_CALLING : DEFAULT_AF_FOR_TUMOR_NORMAL_CALLING);
}

/**
* Only variants with tumor LODs exceeding this threshold will be written to the VCF, regardless of filter status.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ public class M2FiltersArgumentCollection extends AssemblyBasedCallerArgumentColl
public static final String NORMAL_ARTIFACT_LOD_LONG_NAME = "normal-artifact-lod";
public static final String MAX_GERMLINE_POSTERIOR_LONG_NAME = "max-germline-posterior";
public static final String MAX_ALT_ALLELE_COUNT_LONG_NAME = "max-alt-allele-count";
public static final String MIN_BASES_TO_SUSPECT_PCR_SLIPPAGE_LONG_NAME = "min-pcr-slippage-size";
public static final String PCR_SLIPPAGE_RATE_LONG_NAME = "pcr-slippage-rate";
public static final String PCR_SLIPPAGE_P_VALUE_LONG_NAME = "pcr-slippage-p-value";
public static final String MIN_MEDIAN_MAPPING_QUALITY_LONG_NAME = "min-median-mapping-quality";
public static final String MIN_MEDIAN_BASE_QUALITY_LONG_NAME = "min-median-base-quality";
public static final String MAX_MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_LONG_NAME = "max-median-fragment-length-difference";
Expand Down Expand Up @@ -64,6 +67,15 @@ public class M2FiltersArgumentCollection extends AssemblyBasedCallerArgumentColl
@Argument(fullName = MAX_ALT_ALLELE_COUNT_LONG_NAME, optional = true, doc = "filter variants with too many alt alleles")
public int numAltAllelesThreshold = 1;

@Argument(fullName = MIN_BASES_TO_SUSPECT_PCR_SLIPPAGE_LONG_NAME, optional = true, doc = "Minimum number of reference bases in an STR to suspect PCR slippage")
public int minPcrSlippageBases = 8;

@Argument(fullName = PCR_SLIPPAGE_RATE_LONG_NAME, optional = true, doc = "In contexts where PCR slippage is suspected, the rough fraction of reads in which slippage occurs")
public double pcrSlippageRate = 0.1;

@Argument(fullName = PCR_SLIPPAGE_P_VALUE_LONG_NAME, optional = true, doc = "P-value threshold for PCR slippage")
public double pcrSlippagePValueThreshold = 0.001;

@Argument(fullName = MIN_MEDIAN_MAPPING_QUALITY_LONG_NAME, optional = true, doc="filter variants for which alt reads' median mapping quality is too low.")
public int minMedianMappingQuality = 30;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.vcf.VCFConstants;
import org.apache.commons.math3.distribution.BinomialDistribution;
import org.broadinstitute.hellbender.tools.walkers.annotator.*;
import org.broadinstitute.hellbender.tools.walkers.contamination.ContaminationRecord;
import org.broadinstitute.hellbender.tools.walkers.contamination.MinorAlleleFractionRecord;
Expand Down Expand Up @@ -74,17 +75,25 @@ private void applyTriallelicFilter(final VariantContext vc, final VariantContext
}
}

private static void applySTRFilter(final VariantContext vc, final VariantContextBuilder vcb) {
private void applySTRFilter(final VariantContext vc, final VariantContextBuilder vcb) {
// STR contractions, such as ACTACTACT -> ACTACT, are overwhelmingly false positives so we hard filter by default
if (vc.isIndel()) {
final int[] rpa = vc.getAttributeAsList(GATKVCFConstants.REPEATS_PER_ALLELE_KEY).stream()
.mapToInt(o -> Integer.parseInt(String.valueOf(o))).toArray();
final String ru = vc.getAttributeAsString(GATKVCFConstants.REPEAT_UNIT_KEY, "");
if (rpa != null && rpa.length > 1 && ru.length() > 1) {
final int refCount = rpa[0];
final int altCount = rpa[1];

if (refCount - altCount == 1) {
if (rpa == null || rpa.length < 2 || ru.length() == 0) {
return;
}
final int referenceSTRBaseCount = ru.length() * rpa[0];
final int numPCRSlips = rpa[0] - rpa[1];
if (referenceSTRBaseCount >= MTFAC.minPcrSlippageBases && numPCRSlips == 1) {
// calculate the p-value that out of n reads we would have at least k slippage reads
// if this p-value is small we keep the variant (reject the PCR slippage hypothesis)
final int[] ADs = vc.getGenotype(tumorSample).getAD();
final int depth = ADs == null ? 0 : (int) MathUtils.sum(ADs);
final double oneSidedPValueOfSlippage = (ADs == null || ADs.length < 2) ? 1.0 :
new BinomialDistribution(null, depth, MTFAC.pcrSlippageRate).cumulativeProbability(ADs[1] - 1, depth);
if (oneSidedPValueOfSlippage > MTFAC.pcrSlippagePValueThreshold) {
vcb.filter(GATKVCFConstants.STR_CONTRACTION_FILTER_NAME);
}
}
Expand Down Expand Up @@ -253,7 +262,6 @@ private void applyDuplicatedAltReadFilter(final M2FiltersArgumentCollection MTFA
}
}

//TODO: building a list via repeated side effects is ugly
public void applyFilters(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final VariantContextBuilder vcb) {
vcb.filters(new HashSet<>());
applyInsufficientEvidenceFilter(MTFAC, vc, vcb);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ public CalledHaplotypes callMutations(
final Optional<LikelihoodMatrix<Allele>> subsettedLog10NormalMatrix =
getForNormal(() -> new SubsettedLikelihoodMatrix<>(log10NormalMatrix.get(), allSomaticAlleles));

final Map<String, Object> populationAlleleFreqeuncyAnnotation = GermlineProbabilityCalculator.getPopulationAlleleFrequencyAnnotation(featureContext.getValues(MTAC.germlineResource, loc), somaticAltAlleles, MTAC.afOfAllelesNotInGermlineResource);
final Map<String, Object> populationAlleleFreqeuncyAnnotation = GermlineProbabilityCalculator.getPopulationAlleleFrequencyAnnotation(featureContext.getValues(MTAC.germlineResource, loc), somaticAltAlleles, MTAC.getDefaultAlleleFrequency());

final VariantContextBuilder callVcb = new VariantContextBuilder(mergedVC)
.alleles(allSomaticAlleles)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,7 @@ public void testDreamTumorNormal(final File tumorBam, final String tumorSample,

// tumor-only calling with gnomAD
if (!tumorOnly) {
args.addAll(Arrays.asList("-I", normalBam.getAbsolutePath(),
"-" + M2ArgumentCollection.NORMAL_SAMPLE_SHORT_NAME, normal,
"-" + M2ArgumentCollection.DEFAULT_AF_SHORT_NAME, "0.000003"));
args.addAll(Arrays.asList("-I", normalBam.getAbsolutePath(), "-" + M2ArgumentCollection.NORMAL_SAMPLE_SHORT_NAME, normal));
};

runCommandLine(args);
Expand Down