Skip to content

Commit

Permalink
responded to some things and find that things dont complie?
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesemery committed Feb 3, 2022
1 parent 26b1ed6 commit b234ee3
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ public AssemblyRegionIterator(final MultiIntervalShard<GATKRead> readShard,
this.readCachingIterator = new ReadCachingIterator(readShard.iterator());
this.readCache = new ArrayDeque<>();
this.activityProfile = new BandPassActivityProfile(assemblyRegionArgs.maxProbPropagationDistance, assemblyRegionArgs.activeProbThreshold, BandPassActivityProfile.MAX_FILTER_SIZE, BandPassActivityProfile.DEFAULT_SIGMA, readHeader);
// TODO: AH & BG handle changing contig
this.pendingAlignmentData = new ArrayDeque<>();

// We wrap our LocusIteratorByState inside an IntervalAlignmentContextIterator so that we get empty loci
Expand Down Expand Up @@ -118,7 +117,6 @@ public AssemblyRegion next() {

private AssemblyRegion loadNextAssemblyRegion() {
AssemblyRegion nextRegion = null;
List<AlignmentAndReferenceContext> alignmentData = new ArrayList<>();

while ( locusIterator.hasNext() && nextRegion == null ) {
final AlignmentContext pileup = locusIterator.next();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,17 @@ public abstract class AssemblyBasedCallerArgumentCollection {
public static final String SMITH_WATERMAN_READ_TO_HAPLOTYPE_GAP_OPEN_PENALTY_LONG_NAME = "smith-waterman-read-to-haplotype-gap-open-penalty";
public static final String SMITH_WATERMAN_READ_TO_HAPLOTYPE_GAP_EXTEND_PENALTY_LONG_NAME = "smith-waterman-read-to-haplotype-gap-extend-penalty";

/** See documentation at {@link SmithWatermanAlignmentConstants#STANDARD_NGS}. */
/**
* See documentation at {@link SmithWatermanAlignmentConstants#STANDARD_NGS}.
*/
private static final SWParameters DEFAULT_DANGLING_END_SMITH_WATERMAN_PARAMETERS = SmithWatermanAlignmentConstants.STANDARD_NGS;
/** See documentation at {@link SmithWatermanAlignmentConstants#NEW_SW_PARAMETERS}. */
/**
* See documentation at {@link SmithWatermanAlignmentConstants#NEW_SW_PARAMETERS}.
*/
private static final SWParameters DEFAULT_HAPLOTYPE_TO_REFERENCE_SMITH_WATERMAN_PARAMETERS = SmithWatermanAlignmentConstants.NEW_SW_PARAMETERS;
/** See documentation at {@link SmithWatermanAlignmentConstants#ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS}. */
/**
* See documentation at {@link SmithWatermanAlignmentConstants#ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS}.
*/
private static final SWParameters DEFAULT_READ_TO_HAPLOTYPE_SMITH_WATERMAN_PARAMETERS = SmithWatermanAlignmentConstants.ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS;

public ReadThreadingAssembler createReadThreadingAssembler() {
Expand All @@ -79,42 +85,41 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
* The assembled haplotypes and locally realigned reads will be written as BAM to this file if requested. Really
* for debugging purposes only. Note that the output here does not include uninformative reads so that not every
* input read is emitted to the bam.
*
* <p>
* Turning on this mode may result in serious performance cost for the caller. It's really only appropriate to
* use in specific areas where you want to better understand why the caller is making specific calls.
*
* <p>
* The reads are written out containing an "HC" tag (integer) that encodes which haplotype each read best matches
* according to the haplotype caller's likelihood calculation. The use of this tag is primarily intended
* to allow good coloring of reads in IGV. Simply go to "Color Alignments By > Tag" and enter "HC" to more
* easily see which reads go with these haplotype.
*
* <p>
* Note that the haplotypes (called or all, depending on mode) are emitted as single reads covering the entire
* active region, coming from sample "HC" and a special read group called "ArtificialHaplotype". This will increase the
* pileup depth compared to what would be expected from the reads only, especially in complex regions.
*
* <p>
* Note also that only reads that are actually informative about the haplotypes are emitted. By informative we mean
* that there's a meaningful difference in the likelihood of the read coming from one haplotype compared to
* its next best haplotype.
*
* <p>
* If multiple BAMs are passed as input to the tool (as is common for M2), then they will be combined in the bamout
* output and tagged with the appropriate sample names.
*
* <p>
* The best way to visualize the output of this mode is with IGV. Tell IGV to color the alignments by tag,
* and give it the "HC" tag, so you can see which reads support each haplotype. Finally, you can tell IGV
* to group by sample, which will separate the potential haplotypes from the reads. All of this can be seen in
* <a href="https://www.dropbox.com/s/xvy7sbxpf13x5bp/haplotypecaller%20bamout%20for%20docs.png">this screenshot</a>
*
*/
@Advanced
@Argument(fullName= BAM_OUTPUT_LONG_NAME, shortName= BAM_OUTPUT_SHORT_NAME, doc="File to which assembled haplotypes should be written", optional = true)
@Argument(fullName = BAM_OUTPUT_LONG_NAME, shortName = BAM_OUTPUT_SHORT_NAME, doc = "File to which assembled haplotypes should be written", optional = true)
public String bamOutputPath = null;

/**
* The type of BAM output we want to see. This determines whether HC will write out all of the haplotypes it
* considered (top 128 max) or just the ones that were selected as alleles and assigned to samples.
*/
@Advanced
@Argument(fullName= BAM_WRITER_TYPE_LONG_NAME, doc="Which haplotypes should be written to the BAM", optional = true)
@Argument(fullName = BAM_WRITER_TYPE_LONG_NAME, doc = "Which haplotypes should be written to the BAM", optional = true)
public HaplotypeBAMWriter.WriterType bamWriterType = HaplotypeBAMWriter.WriterType.CALLED_HAPLOTYPES;

// -----------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -145,7 +150,7 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
* For Mutect2, this is a BETA feature that functions similarly to the HaplotypeCaller reference confidence/GVCF mode.
*/
@Advanced
@Argument(fullName= EMIT_REF_CONFIDENCE_LONG_NAME, shortName= EMIT_REF_CONFIDENCE_SHORT_NAME, doc="Mode for emitting reference confidence scores (For Mutect2, this is a BETA feature)", optional = true)
@Argument(fullName = EMIT_REF_CONFIDENCE_LONG_NAME, shortName = EMIT_REF_CONFIDENCE_SHORT_NAME, doc = "Mode for emitting reference confidence scores (For Mutect2, this is a BETA feature)", optional = true)
public ReferenceConfidenceMode emitReferenceConfidence = ReferenceConfidenceMode.NONE;

protected abstract int getDefaultMaxMnpDistance();
Expand All @@ -158,7 +163,7 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
doc = "Two or more phased substitutions separated by this distance or less are merged into MNPs.", optional = true)
public int maxMnpDistance = getDefaultMaxMnpDistance();

@Argument(fullName= FORCE_CALL_ALLELES_LONG_NAME, doc="The set of alleles to force-call regardless of evidence", optional=true)
@Argument(fullName = FORCE_CALL_ALLELES_LONG_NAME, doc = "The set of alleles to force-call regardless of evidence", optional = true)
public FeatureInput<VariantContext> alleles;

@Advanced
Expand All @@ -178,10 +183,10 @@ public ReadThreadingAssembler createReadThreadingAssembler() {

/**
* Enables pileup-based haplotype creation and variant detection
*
* <p>
* NOTE: --pileup-detection is a beta feature. Use this mode at your own risk.
*/
@Argument(fullName= PILEUP_DETECTION_LONG_NAME, doc = "If enabled, the variant caller will create pileup-based haplotypes in addition to the assembly-based haplotype generation.", optional = true)
@Argument(fullName = PILEUP_DETECTION_LONG_NAME, doc = "If enabled, the variant caller will create pileup-based haplotypes in addition to the assembly-based haplotype generation.", optional = true)
public boolean usePileupDetection = false;


Expand Down Expand Up @@ -303,4 +308,5 @@ public SWParameters getReadToHaplotypeSWParameters() {
smithWatermanReadToHaplotypeMismatchPenalty,
smithWatermanReadToHaplotypeGapOpenPenalty,
smithWatermanReadToHaplotypeGapExtendPenalty);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region,
}

if (!forcedPileupAlleles.isEmpty()) {
processPileupAlleles(region, forcedPileupAlleles, argumentCollection.pileupDetectionArgs.snpAdajacentToAssemblyIndel, argumentCollection.maxMnpDistance, aligner, refHaplotype, assemblyResultSet);
processPileupAlleles(region, forcedPileupAlleles, argumentCollection.pileupDetectionArgs.snpAdajacentToAssemblyIndel, argumentCollection.maxMnpDistance, aligner, refHaplotype, assemblyResultSet, argumentCollection.getHaplotypeToReferenceSWParameters());
}


Expand All @@ -343,18 +343,11 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region,

/**
* Handle pileup detected alternate alleles.
* @param region
* @param givenAlleles
* @param maxMnpDistance
* @param snpAdjacentToIndelLimit
* @param aligner
* @param refHaplotype
* @param assemblyResultSet
*/
@VisibleForTesting
static void processPileupAlleles(final AssemblyRegion region, final List<VariantContext> givenAlleles, final int maxMnpDistance,
final int snpAdjacentToIndelLimit, final SmithWatermanAligner aligner, final Haplotype refHaplotype,
final AssemblyResultSet assemblyResultSet) {
final AssemblyResultSet assemblyResultSet, final SWParameters haplotypeToReferenceSWParameters) {
final int assemblyRegionStart = region.getPaddedSpan().getStart();
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
final Map<Integer, VariantContext> assembledVariants = assemblyResultSet.getVariationEvents(maxMnpDistance).stream()
Expand All @@ -363,7 +356,6 @@ static void processPileupAlleles(final AssemblyRegion region, final List<Variant
.collect(Collectors.groupingBy(VariantContext::getStart, Collectors.collectingAndThen(Collectors.toList(), AssemblyBasedCallerUtils::makeMergedVariantContext))).values();

Set<Haplotype> baseHaplotypes = new TreeSet<>();
//BG Testing assembledAndNewHaplotypes.addAll(assemblyResultSet.getHaplotypeList());
//Todo BG testing to see if limiting the initial list of haplotypes resolves the issue of variants not getting called in clustered regions
baseHaplotypes.addAll(assemblyResultSet.getHaplotypeList().stream()
.sorted(Comparator.comparingInt((Haplotype hap) -> hap.isReference() ? 1 : 0).thenComparingDouble(hap -> hap.getScore()).reversed())
Expand Down Expand Up @@ -405,7 +397,7 @@ static void processPileupAlleles(final AssemblyRegion region, final List<Variant
// BG & AH this is the right way to insert a new haplotype
final Haplotype insertedHaplotype = baseHaplotype.insertAllele(longerRef, givenAllele, activeRegionStart + givenVC.getStart() - assemblyRegionStart, givenVC.getStart());
if (insertedHaplotype != null) { // can be null if the requested allele can't be inserted into the haplotype
final Cigar cigar = CigarUtils.calculateCigar(refHaplotype.getBases(), insertedHaplotype.getBases(), aligner, SWOverhangStrategy.INDEL);
final Cigar cigar = CigarUtils.calculateCigar(refHaplotype.getBases(), insertedHaplotype.getBases(), aligner, haplotypeToReferenceSWParameters, SWOverhangStrategy.INDEL);
insertedHaplotype.setCigar(cigar);
insertedHaplotype.setGenomeLocation(refHaplotype.getGenomeLocation());
insertedHaplotype.setAlignmentStartHapwrtRef(activeRegionStart);
Expand Down Expand Up @@ -485,7 +477,15 @@ static void addGivenAlleles(final int assemblyRegionStart, final List<VariantCon
}



/**
* Returns a map of kmer -> count of total unique occurrences across all of the provided reads. This is a necessary step
* in the {@link AssemblyBasedCallerUtils#processPileupAlleles(AssemblyRegion, List, int, int, SmithWatermanAligner, Haplotype, AssemblyResultSet)} pileup
* hpalotype filtering.
*
* @param hardClippedPileupReads Reads to scan to genreate kmer counts from
* @param kmerSize kmer size to use in kmerizing the reads
* @return a map of kmer to the number of occurences in the data.
*/
static Map<Kmer, Integer> getKmerReadCounts(final List<GATKRead> hardClippedPileupReads, int kmerSize) {
Map<Kmer, Integer> kmerReadCounts = new HashMap<>();
hardClippedPileupReads.forEach(read -> kmerizeAndCountOccurences(read.getBases(), kmerSize, kmerReadCounts));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ public final class Haplotype extends Allele {
private Cigar cigar;
private int alignmentStartHapwrtRef;
private double score = Double.NaN;
// private List<VariantContext> variantContextList = new ArrayList<>();

// debug information for tracking kmer sizes used in graph construction for debug output
private int kmerSize = 0;
Expand Down Expand Up @@ -216,7 +215,6 @@ public void setCigar( final Cigar cigar ) {
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation, final int genomicInsertLocation ) {
// refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
final Pair<Integer, CigarOperator> haplotypeInsertLocationAndOperator = ReadUtils.getReadIndexForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation);
// final Pair<Integer, CigarOperator> haplotypeInsertLocationAndOperator = ReadUtils.getReadIndexForReferenceCoordinate(genomeLocation.getStart(), cigar, refInsertLocation);

// can't insert outside the haplotype or into a deletion
if( haplotypeInsertLocationAndOperator.getLeft() == ReadUtils.READ_INDEX_NOT_FOUND || !haplotypeInsertLocationAndOperator.getRight().consumesReadBases() ) {
Expand All @@ -234,9 +232,7 @@ public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, f
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, 0, haplotypeInsertLocation)); // bases before the variant
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, haplotypeInsertLocation + refAllele.length(), myBases.length)); // bases after the variant
Haplotype newHaplotype = new Haplotype(newHaplotypeBases);
// newHaplotype.setVariantContextList(this.getVariantContextList());
return newHaplotype;
return new Haplotype(newHaplotypeBases);
}

/**
Expand Down Expand Up @@ -274,18 +270,4 @@ public int getKmerSize() {
public void setKmerSize(int kmerSize) {
this.kmerSize = kmerSize;
}


//
// public void setVariantContextList(List<VariantContext> variantContextList) {
// this.variantContextList.addAll(variantContextList);
// }
//
// public List<VariantContext> getVariantContextList() {
// return variantContextList;
// }
//
// public void addVariantContext(VariantContext variantContext) {
// this.variantContextList.add(variantContext);
// }
}
Original file line number Diff line number Diff line change
Expand Up @@ -1648,7 +1648,7 @@ public void testPileupCallingDefaultsConsistentWithPastResults(final String inpu
* Test that in VCF mode we're consistent with past GATK4 results
*/
@Test(dataProvider="HaplotypeCallerTestInputs")
public void testPileupCallingDRAGENModeConsistentWithPastREsultsResults(final String inputFileName, final String referenceFileName) throws Exception {
public void testPileupCallingDRAGENModeConsistentWithPastResults(final String inputFileName, final String referenceFileName) throws Exception {
Utils.resetRandomGenerator();

final File output = createTempFile("testVCFModeIsConsistentWithPastResults", ".vcf");
Expand Down

0 comments on commit b234ee3

Please sign in to comment.