responded to some things and find that things dont complie?

broadinstitute · Feb 3, 2022 · b234ee3 · b234ee3
1 parent 26b1ed6
commit b234ee3
Show file tree

Hide file tree

Showing 5 changed files with 37 additions and 51 deletions.
diff --git a/src/main/java/org/broadinstitute/hellbender/engine/AssemblyRegionIterator.java b/src/main/java/org/broadinstitute/hellbender/engine/AssemblyRegionIterator.java
@@ -87,7 +87,6 @@ public AssemblyRegionIterator(final MultiIntervalShard<GATKRead> readShard,
         this.readCachingIterator = new ReadCachingIterator(readShard.iterator());
         this.readCache = new ArrayDeque<>();
         this.activityProfile = new BandPassActivityProfile(assemblyRegionArgs.maxProbPropagationDistance, assemblyRegionArgs.activeProbThreshold, BandPassActivityProfile.MAX_FILTER_SIZE, BandPassActivityProfile.DEFAULT_SIGMA, readHeader);
-// TODO: AH & BG handle changing contig
         this.pendingAlignmentData = new ArrayDeque<>();
 
         // We wrap our LocusIteratorByState inside an IntervalAlignmentContextIterator so that we get empty loci
@@ -118,7 +117,6 @@ public AssemblyRegion next() {
 
     private AssemblyRegion loadNextAssemblyRegion() {
         AssemblyRegion nextRegion = null;
-        List<AlignmentAndReferenceContext> alignmentData = new ArrayList<>();
 
         while ( locusIterator.hasNext() && nextRegion == null ) {
             final AlignmentContext pileup = locusIterator.next();

diff --git a/...itute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java b/...itute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java
@@ -49,11 +49,17 @@ public abstract class AssemblyBasedCallerArgumentCollection {
     public static final String SMITH_WATERMAN_READ_TO_HAPLOTYPE_GAP_OPEN_PENALTY_LONG_NAME = "smith-waterman-read-to-haplotype-gap-open-penalty";
     public static final String SMITH_WATERMAN_READ_TO_HAPLOTYPE_GAP_EXTEND_PENALTY_LONG_NAME = "smith-waterman-read-to-haplotype-gap-extend-penalty";
 
-    /** See documentation at {@link SmithWatermanAlignmentConstants#STANDARD_NGS}. */
+    /**
+     * See documentation at {@link SmithWatermanAlignmentConstants#STANDARD_NGS}.
+     */
     private static final SWParameters DEFAULT_DANGLING_END_SMITH_WATERMAN_PARAMETERS = SmithWatermanAlignmentConstants.STANDARD_NGS;
-    /** See documentation at {@link SmithWatermanAlignmentConstants#NEW_SW_PARAMETERS}. */
+    /**
+     * See documentation at {@link SmithWatermanAlignmentConstants#NEW_SW_PARAMETERS}.
+     */
     private static final SWParameters DEFAULT_HAPLOTYPE_TO_REFERENCE_SMITH_WATERMAN_PARAMETERS = SmithWatermanAlignmentConstants.NEW_SW_PARAMETERS;
-    /** See documentation at {@link SmithWatermanAlignmentConstants#ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS}. */
+    /**
+     * See documentation at {@link SmithWatermanAlignmentConstants#ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS}.
+     */
     private static final SWParameters DEFAULT_READ_TO_HAPLOTYPE_SMITH_WATERMAN_PARAMETERS = SmithWatermanAlignmentConstants.ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS;
 
     public ReadThreadingAssembler createReadThreadingAssembler() {
@@ -79,42 +85,41 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
      * The assembled haplotypes and locally realigned reads will be written as BAM to this file if requested.  Really
      * for debugging purposes only. Note that the output here does not include uninformative reads so that not every
      * input read is emitted to the bam.
-     *
+     * <p>
      * Turning on this mode may result in serious performance cost for the caller.  It's really only appropriate to
      * use in specific areas where you want to better understand why the caller is making specific calls.
-     *
+     * <p>
      * The reads are written out containing an "HC" tag (integer) that encodes which haplotype each read best matches
      * according to the haplotype caller's likelihood calculation.  The use of this tag is primarily intended
      * to allow good coloring of reads in IGV.  Simply go to "Color Alignments By > Tag" and enter "HC" to more
      * easily see which reads go with these haplotype.
-     *
+     * <p>
      * Note that the haplotypes (called or all, depending on mode) are emitted as single reads covering the entire
      * active region, coming from sample "HC" and a special read group called "ArtificialHaplotype". This will increase the
      * pileup depth compared to what would be expected from the reads only, especially in complex regions.
-     *
+     * <p>
      * Note also that only reads that are actually informative about the haplotypes are emitted.  By informative we mean
      * that there's a meaningful difference in the likelihood of the read coming from one haplotype compared to
      * its next best haplotype.
-     *
+     * <p>
      * If multiple BAMs are passed as input to the tool (as is common for M2), then they will be combined in the bamout
      * output and tagged with the appropriate sample names.
-     *
+     * <p>
      * The best way to visualize the output of this mode is with IGV.  Tell IGV to color the alignments by tag,
      * and give it the "HC" tag, so you can see which reads support each haplotype.  Finally, you can tell IGV
      * to group by sample, which will separate the potential haplotypes from the reads.  All of this can be seen in
      * <a href="https://www.dropbox.com/s/xvy7sbxpf13x5bp/haplotypecaller%20bamout%20for%20docs.png">this screenshot</a>
-     *
      */
     @Advanced
-    @Argument(fullName= BAM_OUTPUT_LONG_NAME, shortName= BAM_OUTPUT_SHORT_NAME, doc="File to which assembled haplotypes should be written", optional = true)
+    @Argument(fullName = BAM_OUTPUT_LONG_NAME, shortName = BAM_OUTPUT_SHORT_NAME, doc = "File to which assembled haplotypes should be written", optional = true)
     public String bamOutputPath = null;
 
     /**
      * The type of BAM output we want to see. This determines whether HC will write out all of the haplotypes it
      * considered (top 128 max) or just the ones that were selected as alleles and assigned to samples.
      */
     @Advanced
-    @Argument(fullName= BAM_WRITER_TYPE_LONG_NAME, doc="Which haplotypes should be written to the BAM", optional = true)
+    @Argument(fullName = BAM_WRITER_TYPE_LONG_NAME, doc = "Which haplotypes should be written to the BAM", optional = true)
     public HaplotypeBAMWriter.WriterType bamWriterType = HaplotypeBAMWriter.WriterType.CALLED_HAPLOTYPES;
 
     // -----------------------------------------------------------------------------------------------
@@ -145,7 +150,7 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
      * For Mutect2, this is a BETA feature that functions similarly to the HaplotypeCaller reference confidence/GVCF mode.
      */
     @Advanced
-    @Argument(fullName= EMIT_REF_CONFIDENCE_LONG_NAME, shortName= EMIT_REF_CONFIDENCE_SHORT_NAME, doc="Mode for emitting reference confidence scores (For Mutect2, this is a BETA feature)", optional = true)
+    @Argument(fullName = EMIT_REF_CONFIDENCE_LONG_NAME, shortName = EMIT_REF_CONFIDENCE_SHORT_NAME, doc = "Mode for emitting reference confidence scores (For Mutect2, this is a BETA feature)", optional = true)
     public ReferenceConfidenceMode emitReferenceConfidence = ReferenceConfidenceMode.NONE;
 
     protected abstract int getDefaultMaxMnpDistance();
@@ -158,7 +163,7 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
             doc = "Two or more phased substitutions separated by this distance or less are merged into MNPs.", optional = true)
     public int maxMnpDistance = getDefaultMaxMnpDistance();
 
-    @Argument(fullName= FORCE_CALL_ALLELES_LONG_NAME, doc="The set of alleles to force-call regardless of evidence", optional=true)
+    @Argument(fullName = FORCE_CALL_ALLELES_LONG_NAME, doc = "The set of alleles to force-call regardless of evidence", optional = true)
     public FeatureInput<VariantContext> alleles;
 
     @Advanced
@@ -178,10 +183,10 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
 
     /**
      * Enables pileup-based haplotype creation and variant detection
-     *
+     * <p>
      * NOTE: --pileup-detection is a beta feature. Use this mode at your own risk.
      */
-    @Argument(fullName= PILEUP_DETECTION_LONG_NAME, doc = "If enabled, the variant caller will create pileup-based haplotypes in addition to the assembly-based haplotype generation.", optional = true)
+    @Argument(fullName = PILEUP_DETECTION_LONG_NAME, doc = "If enabled, the variant caller will create pileup-based haplotypes in addition to the assembly-based haplotype generation.", optional = true)
     public boolean usePileupDetection = false;
 
 
@@ -303,4 +308,5 @@ public SWParameters getReadToHaplotypeSWParameters() {
                 smithWatermanReadToHaplotypeMismatchPenalty,
                 smithWatermanReadToHaplotypeGapOpenPenalty,
                 smithWatermanReadToHaplotypeGapExtendPenalty);
-    }
+    }
+}
diff --git a/...org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java b/...org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java
@@ -321,7 +321,7 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region,
             }
 
             if (!forcedPileupAlleles.isEmpty()) {
-                processPileupAlleles(region, forcedPileupAlleles, argumentCollection.pileupDetectionArgs.snpAdajacentToAssemblyIndel, argumentCollection.maxMnpDistance, aligner, refHaplotype, assemblyResultSet);
+                processPileupAlleles(region, forcedPileupAlleles, argumentCollection.pileupDetectionArgs.snpAdajacentToAssemblyIndel, argumentCollection.maxMnpDistance, aligner, refHaplotype, assemblyResultSet, argumentCollection.getHaplotypeToReferenceSWParameters());
             }
 
 
@@ -343,18 +343,11 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region,
 
     /**
      * Handle pileup detected alternate alleles.
-     * @param region
-     * @param givenAlleles
-     * @param maxMnpDistance
-     * @param snpAdjacentToIndelLimit
-     * @param aligner
-     * @param refHaplotype
-     * @param assemblyResultSet
      */
     @VisibleForTesting
     static void processPileupAlleles(final AssemblyRegion region, final List<VariantContext> givenAlleles, final int maxMnpDistance,
                                      final int snpAdjacentToIndelLimit, final SmithWatermanAligner aligner, final Haplotype refHaplotype,
-                                     final AssemblyResultSet assemblyResultSet) {
+                                     final AssemblyResultSet assemblyResultSet, final SWParameters haplotypeToReferenceSWParameters) {
         final int assemblyRegionStart = region.getPaddedSpan().getStart();
         final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
         final Map<Integer, VariantContext> assembledVariants = assemblyResultSet.getVariationEvents(maxMnpDistance).stream()
@@ -363,7 +356,6 @@ static void processPileupAlleles(final AssemblyRegion region, final List<Variant
                 .collect(Collectors.groupingBy(VariantContext::getStart, Collectors.collectingAndThen(Collectors.toList(), AssemblyBasedCallerUtils::makeMergedVariantContext))).values();
 
         Set<Haplotype> baseHaplotypes = new TreeSet<>();
-        //BG Testing assembledAndNewHaplotypes.addAll(assemblyResultSet.getHaplotypeList());
         //Todo BG testing to see if limiting the initial list of haplotypes resolves the issue of variants not getting called in clustered regions
         baseHaplotypes.addAll(assemblyResultSet.getHaplotypeList().stream()
                 .sorted(Comparator.comparingInt((Haplotype hap) -> hap.isReference() ? 1 : 0).thenComparingDouble(hap -> hap.getScore()).reversed())
@@ -405,7 +397,7 @@ static void processPileupAlleles(final AssemblyRegion region, final List<Variant
                         // BG & AH this is the right way to insert a new haplotype
                         final Haplotype insertedHaplotype = baseHaplotype.insertAllele(longerRef, givenAllele, activeRegionStart + givenVC.getStart() - assemblyRegionStart, givenVC.getStart());
                         if (insertedHaplotype != null) { // can be null if the requested allele can't be inserted into the haplotype
-                            final Cigar cigar = CigarUtils.calculateCigar(refHaplotype.getBases(), insertedHaplotype.getBases(), aligner, SWOverhangStrategy.INDEL);
+                            final Cigar cigar = CigarUtils.calculateCigar(refHaplotype.getBases(), insertedHaplotype.getBases(), aligner, haplotypeToReferenceSWParameters, SWOverhangStrategy.INDEL);
                             insertedHaplotype.setCigar(cigar);
                             insertedHaplotype.setGenomeLocation(refHaplotype.getGenomeLocation());
                             insertedHaplotype.setAlignmentStartHapwrtRef(activeRegionStart);
@@ -485,7 +477,15 @@ static void addGivenAlleles(final int assemblyRegionStart, final List<VariantCon
     }
 
 
-
+    /**
+     * Returns a map of kmer -> count of total unique occurrences across all of the provided reads. This is a necessary step
+     * in the {@link AssemblyBasedCallerUtils#processPileupAlleles(AssemblyRegion, List, int, int, SmithWatermanAligner, Haplotype, AssemblyResultSet)} pileup
+     * hpalotype filtering.
+     *
+     * @param hardClippedPileupReads  Reads to scan to genreate kmer counts from
+     * @param kmerSize                kmer size to use in kmerizing the reads
+     * @return a map of kmer to the number of occurences in the data.
+     */
     static Map<Kmer, Integer>  getKmerReadCounts(final List<GATKRead> hardClippedPileupReads, int kmerSize) {
         Map<Kmer, Integer> kmerReadCounts = new HashMap<>();
         hardClippedPileupReads.forEach(read -> kmerizeAndCountOccurences(read.getBases(), kmerSize, kmerReadCounts));

diff --git a/src/main/java/org/broadinstitute/hellbender/utils/haplotype/Haplotype.java b/src/main/java/org/broadinstitute/hellbender/utils/haplotype/Haplotype.java
@@ -31,7 +31,6 @@ public final class Haplotype extends Allele {
     private Cigar cigar;
     private int alignmentStartHapwrtRef;
     private double score = Double.NaN;
-//    private List<VariantContext> variantContextList = new ArrayList<>();
 
     // debug information for tracking kmer sizes used in graph construction for debug output
     private int kmerSize = 0;
@@ -216,7 +215,6 @@ public void setCigar( final Cigar cigar ) {
     public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation, final int genomicInsertLocation ) {
         // refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
         final Pair<Integer, CigarOperator> haplotypeInsertLocationAndOperator = ReadUtils.getReadIndexForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation);
-//        final Pair<Integer, CigarOperator> haplotypeInsertLocationAndOperator = ReadUtils.getReadIndexForReferenceCoordinate(genomeLocation.getStart(), cigar, refInsertLocation);
 
         // can't insert outside the haplotype or into a deletion
         if( haplotypeInsertLocationAndOperator.getLeft() == ReadUtils.READ_INDEX_NOT_FOUND || !haplotypeInsertLocationAndOperator.getRight().consumesReadBases() ) {
@@ -234,9 +232,7 @@ public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, f
         newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, 0, haplotypeInsertLocation)); // bases before the variant
         newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
         newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, haplotypeInsertLocation + refAllele.length(), myBases.length)); // bases after the variant
-        Haplotype newHaplotype  = new Haplotype(newHaplotypeBases);
-//        newHaplotype.setVariantContextList(this.getVariantContextList());
-        return newHaplotype;
+        return new Haplotype(newHaplotypeBases);
     }
 
     /**
@@ -274,18 +270,4 @@ public int getKmerSize() {
     public void setKmerSize(int kmerSize) {
         this.kmerSize = kmerSize;
     }
-
-
-//
-//    public void setVariantContextList(List<VariantContext> variantContextList) {
-//        this.variantContextList.addAll(variantContextList);
-//    }
-//
-//    public List<VariantContext> getVariantContextList() {
-//        return variantContextList;
-//    }
-//
-//    public void addVariantContext(VariantContext variantContext) {
-//        this.variantContextList.add(variantContext);
-//    }
 }
diff --git a/...oadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/...oadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -1648,7 +1648,7 @@ public void testPileupCallingDefaultsConsistentWithPastResults(final String inpu
      * Test that in VCF mode we're consistent with past GATK4 results
      */
     @Test(dataProvider="HaplotypeCallerTestInputs")
-    public void testPileupCallingDRAGENModeConsistentWithPastREsultsResults(final String inputFileName, final String referenceFileName) throws Exception {
+    public void testPileupCallingDRAGENModeConsistentWithPastResults(final String inputFileName, final String referenceFileName) throws Exception {
         Utils.resetRandomGenerator();
 
         final File output = createTempFile("testVCFModeIsConsistentWithPastResults", ".vcf");