-
Notifications
You must be signed in to change notification settings - Fork 597
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding test to match WARP tests edge case (#8928)
- Loading branch information
1 parent
3d99f22
commit 64dc4b3
Showing
2 changed files
with
136 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
106 changes: 106 additions & 0 deletions
106
.../org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF/reblockEdgeCase.vcf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
##fileformat=VCFv4.2 | ||
##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location"> | ||
##FILTER=<ID=LowQual,Description="Low quality"> | ||
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block"> | ||
##FORMAT=<ID=PGT,Number=1,Type=String,Description="Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"> | ||
##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group"> | ||
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> | ||
##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias."> | ||
##GATKCommandLine.HaplotypeCaller=<ID=HaplotypeCaller,Version=3.5-0-g36282e4,Date="Wed Oct 20 15:43:16 GMT 2021",Epoch=1634744596639,CommandLineOptions="analysis_type=HaplotypeCaller input_file=[local.sharded.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[OverclippedRead] disable_read_filter=[] intervals=[/cromwell_root/broad-gotc-staging-cromwell-execution/WholeGenomeGermlineSingleSample/08b145f8-14d9-4a70-973d-b082a55d9a47/call-BamToGvcf/VariantCalling/8e5f3c30-f5a0-4114-b095-f1da9207e892/call-ScatterIntervalList/cacheCopy/glob-cb4648beeaff920acb03de7603c06f98/31scattered.interval_list] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/cromwell_root/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=500 baq=OFF baqGapOpenPenalty=40.0 refactor_NDN_cigar_string=false fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 static_quantized_quals=null round_down_quantized=false disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false no_cmdline_in_header=false sites_only=false never_trim_vcf_format_field=false bcf=false bam_compression=null simplifyBAM=false disable_bam_indexing=false generate_md5=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=LINEAR variant_index_parameter=128000 reference_window_stop=0 logging_level=INFO log_to_file=null help=false version=false likelihoodCalculationEngine=PairHMM heterogeneousKmerSizeResolution=COMBO_MIN dbsnp=(RodBinding name= source=UNBOUND) dontTrimActiveRegions=false maxDiscARExtension=25 maxGGAARExtension=300 paddingAroundIndels=150 paddingAroundSNPs=20 comp=[] annotation=[StrandBiasBySample] excludeAnnotation=[ChromosomeCounts, FisherStrand, StrandOddsRatio, QualByDepth] group=[Standard, StandardHCAnnotation] debug=false useFilteredReadsForAnnotations=false emitRefConfidence=GVCF bamOutput=null bamWriterType=CALLED_HAPLOTYPES disableOptimizations=false annotateNDA=false heterozygosity=0.001 indel_heterozygosity=1.25E-4 standard_min_confidence_threshold_for_calling=-0.0 standard_min_confidence_threshold_for_emitting=-0.0 max_alternate_alleles=3 input_prior=[] sample_ploidy=2 genotyping_mode=DISCOVERY alleles=(RodBinding name= source=UNBOUND) contamination_fraction_to_filter=0.0170512 contamination_fraction_per_sample_file=null p_nonref_model=null exactcallslog=null output_mode=EMIT_VARIANTS_ONLY allSitePLs=true gcpHMM=10 pair_hmm_implementation=VECTOR_LOGLESS_CACHING pair_hmm_sub_implementation=ENABLE_ALL always_load_vector_logless_PairHMM_lib=false phredScaledGlobalReadMismappingRate=45 noFpga=false sample_name=null kmerSize=[10, 25] dontIncreaseKmerSizesForCycles=false allowNonUniqueKmersInRef=false numPruningSamples=1 recoverDanglingHeads=false doNotRecoverDanglingBranches=false minDanglingBranchLength=4 consensus=false maxNumHaplotypesInPopulation=128 errorCorrectKmers=false minPruning=2 debugGraphTransformations=false allowCyclesInKmerGraphToGeneratePaths=false graphOutput=null kmerLengthForReadErrorCorrection=25 minObservationsForKmerToBeSolid=20 GVCFGQBands=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 70, 80, 90, 99] indelSizeToEliminateInRefModel=10 min_base_quality_score=10 includeUmappedReads=false useAllelesTrigger=false doNotRunPhysicalPhasing=false keepRG=null justDetermineActiveRegions=false dontGenotype=false dontUseSoftClippedBases=false captureAssemblyFailureBAM=false errorCorrectReads=false pcr_indel_model=CONSERVATIVE maxReadsInRegionPerSample=10000 minReadsPerAlignmentStart=10 mergeVariantsViaLD=false activityProfileOut=null activeRegionOut=null activeRegionIn=null activeRegionExtension=null forceActive=false activeRegionMaxSize=null bandPassSigma=null maxProbPropagationDistance=50 activeProbabilityThreshold=0.002 filter_is_too_short_value=30 do_not_require_softclips_both_ends=false min_mapping_quality_score=20 filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> | ||
##GATKCommandLine=<ID=SelectVariants,CommandLine="SelectVariants --output reblockEdgeCase.vcf --variant gs://broad-gotc-test-storage/reblock_gvcf/wgs/scientific/input/G94982.NA12878/NA12878.g.vcf.gz --intervals chr20:13675308-13675330 --apply-jexl-filters-first false --invertSelect false --exclude-non-variants false --exclude-filtered false --preserve-alleles false --remove-unused-alternates false --restrict-alleles-to ALL --keep-original-ac false --keep-original-dp false --mendelian-violation false --invert-mendelian-violation false --mendelian-violation-qual-threshold 0.0 --select-random-fraction 1.0 --remove-fraction-genotypes 0.0 --ignore-non-ref-in-types false --fully-decode false --max-indel-size 2147483647 --min-indel-size 0 --max-filtered-genotypes 2147483647 --min-filtered-genotypes 0 --max-fraction-filtered-genotypes 1.0 --min-fraction-filtered-genotypes 0.0 --max-nocall-number 2147483647 --max-nocall-fraction 1.0 --set-filtered-gt-to-nocall false --allow-nonoverlapping-command-line-samples false --suppress-reference-path false --fail-on-unsorted-genotype false --genomicsdb-max-alternate-alleles 50 --call-genotypes false --genomicsdb-use-bcf-codec false --genomicsdb-shared-posixfs-optimizations false --genomicsdb-use-gcs-hdfs-connector false --interval-set-rule UNION --interval-padding 0 --interval-exclusion-padding 0 --interval-merging-rule ALL --read-validation-stringency SILENT --seconds-between-progress-updates 10.0 --disable-sequence-dictionary-validation false --create-output-bam-index true --create-output-bam-md5 false --create-output-variant-index true --create-output-variant-md5 false --max-variants-per-shard 0 --lenient false --add-output-sam-program-record true --add-output-vcf-command-line true --cloud-prefetch-buffer 40 --cloud-index-prefetch-buffer -1 --disable-bam-index-caching false --sites-only-vcf-output false --help false --version false --showHidden false --verbosity INFO --QUIET false --use-jdk-deflater false --use-jdk-inflater false --gcs-max-retries 20 --gcs-project-for-requester-pays --disable-tool-default-read-filters false",Version="4.6.0.0-7-g3d99f22-SNAPSHOT",Date="July 26, 2024 at 1:07:32 PM EDT"> | ||
##GVCFBlock0-1=minGQ=0(inclusive),maxGQ=1(exclusive) | ||
##GVCFBlock1-2=minGQ=1(inclusive),maxGQ=2(exclusive) | ||
##GVCFBlock10-11=minGQ=10(inclusive),maxGQ=11(exclusive) | ||
##GVCFBlock11-12=minGQ=11(inclusive),maxGQ=12(exclusive) | ||
##GVCFBlock12-13=minGQ=12(inclusive),maxGQ=13(exclusive) | ||
##GVCFBlock13-14=minGQ=13(inclusive),maxGQ=14(exclusive) | ||
##GVCFBlock14-15=minGQ=14(inclusive),maxGQ=15(exclusive) | ||
##GVCFBlock15-16=minGQ=15(inclusive),maxGQ=16(exclusive) | ||
##GVCFBlock16-17=minGQ=16(inclusive),maxGQ=17(exclusive) | ||
##GVCFBlock17-18=minGQ=17(inclusive),maxGQ=18(exclusive) | ||
##GVCFBlock18-19=minGQ=18(inclusive),maxGQ=19(exclusive) | ||
##GVCFBlock19-20=minGQ=19(inclusive),maxGQ=20(exclusive) | ||
##GVCFBlock2-3=minGQ=2(inclusive),maxGQ=3(exclusive) | ||
##GVCFBlock20-21=minGQ=20(inclusive),maxGQ=21(exclusive) | ||
##GVCFBlock21-22=minGQ=21(inclusive),maxGQ=22(exclusive) | ||
##GVCFBlock22-23=minGQ=22(inclusive),maxGQ=23(exclusive) | ||
##GVCFBlock23-24=minGQ=23(inclusive),maxGQ=24(exclusive) | ||
##GVCFBlock24-25=minGQ=24(inclusive),maxGQ=25(exclusive) | ||
##GVCFBlock25-26=minGQ=25(inclusive),maxGQ=26(exclusive) | ||
##GVCFBlock26-27=minGQ=26(inclusive),maxGQ=27(exclusive) | ||
##GVCFBlock27-28=minGQ=27(inclusive),maxGQ=28(exclusive) | ||
##GVCFBlock28-29=minGQ=28(inclusive),maxGQ=29(exclusive) | ||
##GVCFBlock29-30=minGQ=29(inclusive),maxGQ=30(exclusive) | ||
##GVCFBlock3-4=minGQ=3(inclusive),maxGQ=4(exclusive) | ||
##GVCFBlock30-31=minGQ=30(inclusive),maxGQ=31(exclusive) | ||
##GVCFBlock31-32=minGQ=31(inclusive),maxGQ=32(exclusive) | ||
##GVCFBlock32-33=minGQ=32(inclusive),maxGQ=33(exclusive) | ||
##GVCFBlock33-34=minGQ=33(inclusive),maxGQ=34(exclusive) | ||
##GVCFBlock34-35=minGQ=34(inclusive),maxGQ=35(exclusive) | ||
##GVCFBlock35-36=minGQ=35(inclusive),maxGQ=36(exclusive) | ||
##GVCFBlock36-37=minGQ=36(inclusive),maxGQ=37(exclusive) | ||
##GVCFBlock37-38=minGQ=37(inclusive),maxGQ=38(exclusive) | ||
##GVCFBlock38-39=minGQ=38(inclusive),maxGQ=39(exclusive) | ||
##GVCFBlock39-40=minGQ=39(inclusive),maxGQ=40(exclusive) | ||
##GVCFBlock4-5=minGQ=4(inclusive),maxGQ=5(exclusive) | ||
##GVCFBlock40-41=minGQ=40(inclusive),maxGQ=41(exclusive) | ||
##GVCFBlock41-42=minGQ=41(inclusive),maxGQ=42(exclusive) | ||
##GVCFBlock42-43=minGQ=42(inclusive),maxGQ=43(exclusive) | ||
##GVCFBlock43-44=minGQ=43(inclusive),maxGQ=44(exclusive) | ||
##GVCFBlock44-45=minGQ=44(inclusive),maxGQ=45(exclusive) | ||
##GVCFBlock45-46=minGQ=45(inclusive),maxGQ=46(exclusive) | ||
##GVCFBlock46-47=minGQ=46(inclusive),maxGQ=47(exclusive) | ||
##GVCFBlock47-48=minGQ=47(inclusive),maxGQ=48(exclusive) | ||
##GVCFBlock48-49=minGQ=48(inclusive),maxGQ=49(exclusive) | ||
##GVCFBlock49-50=minGQ=49(inclusive),maxGQ=50(exclusive) | ||
##GVCFBlock5-6=minGQ=5(inclusive),maxGQ=6(exclusive) | ||
##GVCFBlock50-51=minGQ=50(inclusive),maxGQ=51(exclusive) | ||
##GVCFBlock51-52=minGQ=51(inclusive),maxGQ=52(exclusive) | ||
##GVCFBlock52-53=minGQ=52(inclusive),maxGQ=53(exclusive) | ||
##GVCFBlock53-54=minGQ=53(inclusive),maxGQ=54(exclusive) | ||
##GVCFBlock54-55=minGQ=54(inclusive),maxGQ=55(exclusive) | ||
##GVCFBlock55-56=minGQ=55(inclusive),maxGQ=56(exclusive) | ||
##GVCFBlock56-57=minGQ=56(inclusive),maxGQ=57(exclusive) | ||
##GVCFBlock57-58=minGQ=57(inclusive),maxGQ=58(exclusive) | ||
##GVCFBlock58-59=minGQ=58(inclusive),maxGQ=59(exclusive) | ||
##GVCFBlock59-60=minGQ=59(inclusive),maxGQ=60(exclusive) | ||
##GVCFBlock6-7=minGQ=6(inclusive),maxGQ=7(exclusive) | ||
##GVCFBlock60-70=minGQ=60(inclusive),maxGQ=70(exclusive) | ||
##GVCFBlock7-8=minGQ=7(inclusive),maxGQ=8(exclusive) | ||
##GVCFBlock70-80=minGQ=70(inclusive),maxGQ=80(exclusive) | ||
##GVCFBlock8-9=minGQ=8(inclusive),maxGQ=9(exclusive) | ||
##GVCFBlock80-90=minGQ=80(inclusive),maxGQ=90(exclusive) | ||
##GVCFBlock9-10=minGQ=9(inclusive),maxGQ=10(exclusive) | ||
##GVCFBlock90-99=minGQ=90(inclusive),maxGQ=99(exclusive) | ||
##GVCFBlock99-2147483647=minGQ=99(inclusive),maxGQ=2147483647(exclusive) | ||
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> | ||
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> | ||
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> | ||
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> | ||
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases"> | ||
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> | ||
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?"> | ||
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval"> | ||
##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity"> | ||
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes"> | ||
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation"> | ||
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"> | ||
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"> | ||
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> | ||
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> | ||
##INFO=<ID=RAW_MQ,Number=1,Type=Float,Description="Raw data for RMS Mapping Quality"> | ||
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> | ||
##contig=<ID=chr20,length=64444167> | ||
##source=SelectVariants | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 | ||
chr20 13675306 . AGAAGGAAGGAAGGAAGGAAGGAAG A,AGAAG,<NON_REF> 0 . DP=26;ExcessHet=3.0103;MLEAC=0,0,0;MLEAF=0.00,0.00,0.00;RAW_MQ=70609.00 GT:AD:DP:GQ:PL:SB 0/0:8,0,0,0:8:24:0,44,673,41,397,607,24,380,377,360:8,0,0,0 | ||
chr20 13675308 . AAGGAAGGAAGGAAG A,AGAAG,<NON_REF> 0 . DP=26;ExcessHet=3.0103;MLEAC=0,0,0;MLEAF=0.00,0.00,0.00;RAW_MQ=70609.00 GT:AD:DP:GQ:PL:SB 0/0:0,0,0,0:0:0:0,20,313,18,39,249,0,21,18,0:0,0,0,0 | ||
chr20 13675322 . G A,<NON_REF> 0 . DP=26;ExcessHet=3.0103;MLEAC=0,0;MLEAF=0.00,0.00;RAW_MQ=70609.00 GT:AD:DP:GQ:PL:SB 0/0:6,0,0:6:19:0,19,252,19,252,252:4,2,0,0 | ||
chr20 13675323 . G <NON_REF> . . END=13675325 GT:DP:GQ:MIN_DP:PL 0/0:26:0:25:0,0,25 | ||
chr20 13675326 . G A,<NON_REF> 0.03 . DP=26;ExcessHet=3.0103;MLEAC=0,0;MLEAF=0.00,0.00;RAW_MQ=70609.00 GT:AD:DP:GQ:PGT:PID:PL:SB 0/1:0,0,0:0:0:0|1:13675295_AAG_A:1,0,0,2,2,3:0,0,0,0 | ||
chr20 13675327 . G <NON_REF> . . END=13675329 GT:DP:GQ:MIN_DP:PL 0/0:26:0:24:0,0,49 | ||
chr20 13675330 . G A,<NON_REF> 0 . DP=26;ExcessHet=3.0103;MLEAC=0,0;MLEAF=0.00,0.00;RAW_MQ=70609.00 GT:AD:DP:GQ:PL:SB 0/0:7,0,0:7:22:0,22,316,22,316,316:6,1,0,0 |