-
Notifications
You must be signed in to change notification settings - Fork 597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Remove hardcoded limit on max homopolymer call #8088
Changes from all commits
d8ad636
8667f2f
6ec0945
cdbe57e
498673e
36d4821
097ee71
fce3071
5a1674e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -4,6 +4,7 @@ | |||||
import htsjdk.samtools.Cigar; | ||||||
import htsjdk.samtools.SAMFileHeader; | ||||||
import htsjdk.samtools.SAMFileWriter; | ||||||
import htsjdk.samtools.SAMReadGroupRecord; | ||||||
import htsjdk.samtools.reference.ReferenceSequenceFile; | ||||||
import htsjdk.samtools.util.Locatable; | ||||||
import htsjdk.variant.variantcontext.*; | ||||||
|
@@ -52,6 +53,7 @@ | |||||
public final class AssemblyBasedCallerUtils { | ||||||
|
||||||
public static final int REFERENCE_PADDING_FOR_ASSEMBLY = 500; | ||||||
public static final int DETERMINE_COLLAPSE_THRESHOLD = -1; | ||||||
public static final int NUM_HAPLOTYPES_TO_INJECT_FORCE_CALLING_ALLELES_INTO = 5; | ||||||
public static final String SUPPORTED_ALLELES_TAG="XA"; | ||||||
public static final String CALLABLE_REGION_TAG = "CR"; | ||||||
|
@@ -363,8 +365,12 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region, | |||||
final SWParameters haplotypeToReferenceSWParameters = argumentCollection.getHaplotypeToReferenceSWParameters(); | ||||||
|
||||||
// establish reference mapper, if needed | ||||||
final LongHomopolymerHaplotypeCollapsingEngine haplotypeCollapsing = (argumentCollection.flowAssemblyCollapseHKerSize > 0 && LongHomopolymerHaplotypeCollapsingEngine.needsCollapsing(refHaplotype.getBases(), argumentCollection.flowAssemblyCollapseHKerSize, logger)) | ||||||
? new LongHomopolymerHaplotypeCollapsingEngine(argumentCollection.flowAssemblyCollapseHKerSize, argumentCollection.flowAssemblyCollapsePartialMode, fullReferenceWithPadding, | ||||||
int collapseHmerSize = argumentCollection.flowAssemblyCollapseHKerSize; | ||||||
if (collapseHmerSize == DETERMINE_COLLAPSE_THRESHOLD){ | ||||||
collapseHmerSize = AssemblyBasedCallerUtils.determineFlowAssemblyColapseHmer(header); | ||||||
} | ||||||
final LongHomopolymerHaplotypeCollapsingEngine haplotypeCollapsing = ( collapseHmerSize > 0 && LongHomopolymerHaplotypeCollapsingEngine.needsCollapsing(refHaplotype.getBases(), collapseHmerSize, logger)) | ||||||
? new LongHomopolymerHaplotypeCollapsingEngine(collapseHmerSize, argumentCollection.flowAssemblyCollapsePartialMode, fullReferenceWithPadding, | ||||||
paddedReferenceLoc, logger, argumentCollection.assemblerArgs.debugAssembly, aligner, argumentCollection.getHaplotypeToReferenceSWParameters()) | ||||||
: null; | ||||||
if ( haplotypeCollapsing != null ) { | ||||||
|
@@ -411,6 +417,18 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region, | |||||
} | ||||||
} | ||||||
|
||||||
private static int determineFlowAssemblyColapseHmer(SAMFileHeader readsHeader) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed! |
||||||
int result = 0; | ||||||
List<SAMReadGroupRecord> rgr = readsHeader.getReadGroups(); | ||||||
for (SAMReadGroupRecord rg : rgr) { | ||||||
FlowBasedReadUtils.ReadGroupInfo rgi = new FlowBasedReadUtils.ReadGroupInfo(rg); | ||||||
if (rgi.maxClass >= result) { | ||||||
result = rgi.maxClass; | ||||||
} | ||||||
} | ||||||
return result; | ||||||
} | ||||||
|
||||||
/** | ||||||
* Handle pileup detected alternate alleles. | ||||||
*/ | ||||||
|
@@ -1217,4 +1235,5 @@ private static GATKRead revertSoftClippedBases(GATKRead inputRead){ | |||||
result.setAttribute(ReferenceConfidenceModel.ORIGINAL_SOFTCLIP_END_TAG, softEnd); | ||||||
return result; | ||||||
} | ||||||
|
||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -264,7 +264,7 @@ public enum FlowMode { | |
MIN_BASE_QUALITY_SCORE_SHORT_NAME, "0", | ||
FILTER_ALLELES, "true", | ||
FILTER_ALLELES_SOR_THRESHOLD, "3", | ||
FLOW_ASSEMBLY_COLLAPSE_HMER_SIZE_LONG_NAME, "12", | ||
FLOW_ASSEMBLY_COLLAPSE_HMER_SIZE_LONG_NAME, String.valueOf(AssemblyBasedCallerUtils.DETERMINE_COLLAPSE_THRESHOLD), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this change the default for STANDARD/ADVANCED flow mode for older samples that don't have the mc tag? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should not, if there is no mc - then it will be 12 |
||
OVERRIDE_FRAGMENT_SOFTCLIP_CHECK_LONG_NAME, "true", | ||
FlowBasedAlignmentArgumentCollection.FLOW_LIKELIHOOD_PARALLEL_THREADS_LONG_NAME, "2", | ||
FlowBasedAlignmentArgumentCollection.FLOW_LIKELIHOOD_OPTIMIZED_COMP_LONG_NAME, "true", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package org.broadinstitute.hellbender.utils.read; | ||
import htsjdk.samtools.SAMFileHeader; | ||
import htsjdk.samtools.SAMReadGroupRecord; | ||
import htsjdk.samtools.SamReader; | ||
import htsjdk.samtools.SamReaderFactory; | ||
import org.broadinstitute.hellbender.GATKBaseTest; | ||
import org.testng.annotations.Test; | ||
|
||
import java.io.File; | ||
import java.nio.file.FileSystems; | ||
import java.nio.file.Path; | ||
|
||
|
||
public class FlowBasedReadUtilsUnitTest extends GATKBaseTest{ | ||
@Test | ||
void testReadGroupParsing() throws Exception{ | ||
final String testResourceDir = publicTestDir + "org/broadinstitute/hellbender/utils/read/flow/reads/"; | ||
final String inputDir = testResourceDir + "/input/"; | ||
|
||
final Path inputFile = FileSystems.getDefault().getPath(inputDir, "sample_mc.bam"); | ||
final SamReader reader = SamReaderFactory.makeDefault().open(new File(inputFile.toString())); | ||
SAMFileHeader header = reader.getFileHeader(); | ||
SAMReadGroupRecord rg1 = header.getReadGroup("UGAv3-72"); | ||
FlowBasedReadUtils.ReadGroupInfo frg1 = new FlowBasedReadUtils.ReadGroupInfo(rg1); | ||
assert(frg1.maxClass==12); | ||
assert(frg1.flowOrder.startsWith("TGCA")); | ||
SAMReadGroupRecord rg2 = header.getReadGroup("UGAv3-73"); | ||
FlowBasedReadUtils.ReadGroupInfo frg2 = new FlowBasedReadUtils.ReadGroupInfo(rg2); | ||
assert(frg2.maxClass==20); | ||
assert(frg2.flowOrder.startsWith("TGCA")); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3879,7 +3879,7 @@ chr9 81153765 . G <NON_REF> . . END=81153766 GT:DP:GQ:MIN_DP:PL 0/0:27:69:25:0,6 | |
chr9 81153767 . A <NON_REF> . . END=81153783 GT:DP:GQ:MIN_DP:PL 0/0:25:72:24:0,72,802 | ||
chr9 81153784 . A <NON_REF> . . END=81153785 GT:DP:GQ:MIN_DP:PL 0/0:28:22:27:0,22,996 | ||
chr9 81153786 . G <NON_REF> . . END=81153800 GT:DP:GQ:MIN_DP:PL 0/0:28:81:28:0,81,1175 | ||
chr9 81153801 . C CA,<NON_REF> 0 . ASSEMBLED_HAPS=10;AS_RAW_BaseQRankSum=|0.0,1|NaN;AS_RAW_MQ=90000.00|10800.00|0.00;AS_RAW_MQRankSum=|0.0,1|NaN;AS_RAW_ReadPosRankSum=|0.9,1|NaN;AS_SB_TABLE=8,17|3,0|0,0;BaseQRankSum=0.000;DP=29;ExcessHet=0.0000;FILTERED_HAPS=6;HAPCOMP=4,0;HAPDOM=0.500,0.00;HEC=55,3,2,1,0,0;MLEAC=0,0;MLEAF=0.00,0.00;MQRankSum=0.000;RAW_MQandDP=104400,29;ReadPosRankSum=0.929;SUSP_NOISY_ADJACENT_TP_VARIANT GT:AD:DP:GQ:PL:SB 0/0:25,3,0:28:18:0,18,58,75,692,115:8,17,3,0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The change in HAPCOMP annotations is expected? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I fixed a small bug in assemblycomplexity annotation that caused a different result between two different java versions, it subtly changed the output, but it should be fine |
||
chr9 81153801 . C CA,<NON_REF> 0 . ASSEMBLED_HAPS=10;AS_RAW_BaseQRankSum=|0.0,1|NaN;AS_RAW_MQ=90000.00|10800.00|0.00;AS_RAW_MQRankSum=|0.0,1|NaN;AS_RAW_ReadPosRankSum=|0.9,1|NaN;AS_SB_TABLE=8,17|3,0|0,0;BaseQRankSum=0.000;DP=29;ExcessHet=0.0000;FILTERED_HAPS=6;HAPCOMP=0,0;HAPDOM=0.500,0.00;HEC=55,3,2,1,0,0;MLEAC=0,0;MLEAF=0.00,0.00;MQRankSum=0.000;RAW_MQandDP=104400,29;ReadPosRankSum=0.929;SUSP_NOISY_ADJACENT_TP_VARIANT GT:AD:DP:GQ:PL:SB 0/0:25,3,0:28:18:0,18,58,75,692,115:8,17,3,0 | ||
chr9 81153802 . T <NON_REF> . . END=81153833 GT:DP:GQ:MIN_DP:PL 0/0:38:99:34:0,99,1135 | ||
chr9 81153834 . C <NON_REF> . . END=81153834 GT:DP:GQ:MIN_DP:PL 0/0:39:23:39:0,23,1475 | ||
chr9 81153835 . A <NON_REF> . . END=81153835 GT:DP:GQ:MIN_DP:PL 0/0:39:83:39:0,83,1598 | ||
|
@@ -4206,7 +4206,7 @@ chr9 81156182 . A <NON_REF> . . END=81156183 GT:DP:GQ:MIN_DP:PL 0/0:16:28:16:0,2 | |
chr9 81156184 . A <NON_REF> . . END=81156185 GT:DP:GQ:MIN_DP:PL 0/0:15:13:13:0,13,144 | ||
chr9 81156186 . A <NON_REF> . . END=81156190 GT:DP:GQ:MIN_DP:PL 0/0:16:42:16:0,42,630 | ||
chr9 81156191 . T <NON_REF> . . END=81156194 GT:DP:GQ:MIN_DP:PL 0/0:15:36:15:0,36,540 | ||
chr9 81156195 . A C,<NON_REF> 103.06 . ASSEMBLED_HAPS=14;AS_RAW_BaseQRankSum=||;AS_RAW_MQ=0.00|50400.00|0.00;AS_RAW_MQRankSum=||;AS_RAW_ReadPosRankSum=||;AS_SB_TABLE=0,0|3,11|0,0;DP=15;ExcessHet=0.0000;FILTERED_HAPS=6;HAPCOMP=2,0;HAPDOM=0.250,0.00;HEC=10,6,5,2,1,1,1,0;MLEAC=2,0;MLEAF=1.00,0.00;RAW_MQandDP=54000,15 GT:AD:DP:GQ:PL:SB 1/1:0,14,0:14:42:117,42,0,595,42,117:0,0,3,11 | ||
chr9 81156195 . A C,<NON_REF> 103.06 . ASSEMBLED_HAPS=14;AS_RAW_BaseQRankSum=||;AS_RAW_MQ=0.00|50400.00|0.00;AS_RAW_MQRankSum=||;AS_RAW_ReadPosRankSum=||;AS_SB_TABLE=0,0|3,11|0,0;DP=15;ExcessHet=0.0000;FILTERED_HAPS=6;HAPCOMP=0,0;HAPDOM=0.250,0.00;HEC=10,6,5,2,1,1,1,0;MLEAC=2,0;MLEAF=1.00,0.00;RAW_MQandDP=54000,15 GT:AD:DP:GQ:PL:SB 1/1:0,14,0:14:42:117,42,0,595,42,117:0,0,3,11 | ||
chr9 81156196 . A <NON_REF> . . END=81156196 GT:DP:GQ:MIN_DP:PL 0/0:15:30:15:0,30,450 | ||
chr9 81156197 . T <NON_REF> . . END=81156197 GT:DP:GQ:MIN_DP:PL 0/0:15:10:15:0,10,592 | ||
chr9 81156198 . C <NON_REF> . . END=81156206 GT:DP:GQ:MIN_DP:PL 0/0:15:30:11:0,30,450 | ||
|
@@ -4408,7 +4408,7 @@ chr9 81158617 . A <NON_REF> . . END=81158617 GT:DP:GQ:MIN_DP:PL 0/0:11:9:11:0,9, | |
chr9 81158618 . T <NON_REF> . . END=81158618 GT:DP:GQ:MIN_DP:PL 0/0:10:10:10:0,10,383 | ||
chr9 81158619 . G <NON_REF> . . END=81158621 GT:DP:GQ:MIN_DP:PL 0/0:9:27:9:0,27,377 | ||
chr9 81158622 . C <NON_REF> . . END=81158647 GT:DP:GQ:MIN_DP:PL 0/0:12:30:10:0,30,415 | ||
chr9 81158648 . G GA,<NON_REF> 0.08 . ASSEMBLED_HAPS=41;AS_RAW_BaseQRankSum=|0.0,1|NaN;AS_RAW_MQ=32400.00|7200.00|0.00;AS_RAW_MQRankSum=|0.0,1|NaN;AS_RAW_ReadPosRankSum=|-0.5,1|NaN;AS_SB_TABLE=5,4|2,0|0,0;BaseQRankSum=0.000;DP=13;ExcessHet=0.0000;FILTERED_HAPS=28;HAPCOMP=2,0;HAPDOM=0.500,0.00;HEC=11,7,6,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0;MLEAC=0,0;MLEAF=0.00,0.00;MQRankSum=0.000;RAW_MQandDP=46800,13;ReadPosRankSum=-0.447;SUSP_NOISY_ADJACENT_TP_VARIANT GT:AD:DP:GQ:PL:SB 0/1:9,2,0:11:4:4,0,40,30,186,70:5,4,2,0 | ||
chr9 81158648 . G GA,<NON_REF> 0.08 . ASSEMBLED_HAPS=41;AS_RAW_BaseQRankSum=|0.0,1|NaN;AS_RAW_MQ=32400.00|7200.00|0.00;AS_RAW_MQRankSum=|0.0,1|NaN;AS_RAW_ReadPosRankSum=|-0.5,1|NaN;AS_SB_TABLE=5,4|2,0|0,0;BaseQRankSum=0.000;DP=13;ExcessHet=0.0000;FILTERED_HAPS=28;HAPCOMP=1,0;HAPDOM=0.500,0.00;HEC=11,7,6,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0;MLEAC=0,0;MLEAF=0.00,0.00;MQRankSum=0.000;RAW_MQandDP=46800,13;ReadPosRankSum=-0.447;SUSP_NOISY_ADJACENT_TP_VARIANT GT:AD:DP:GQ:PL:SB 0/1:9,2,0:11:4:4,0,40,30,186,70:5,4,2,0 | ||
chr9 81158649 . A <NON_REF> . . END=81158655 GT:DP:GQ:MIN_DP:PL 0/0:11:33:11:0,33,368 | ||
chr9 81158656 . A <NON_REF> . . END=81158656 GT:DP:GQ:MIN_DP:PL 0/0:12:21:12:0,21,461 | ||
chr9 81158657 . C <NON_REF> . . END=81158670 GT:DP:GQ:MIN_DP:PL 0/0:13:31:12:0,31,495 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed