Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Genotype concordance workflows #414

Merged
merged 7 commits into from
Sep 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions inputs/templates/test/JoinRawCalls/JoinRawCalls.json.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"JoinRawCalls.gatk_docker":{{ dockers.gatk_docker | tojson }},
"JoinRawCalls.sv_base_mini_docker": {{ dockers.sv_base_mini_docker | tojson }},
"JoinRawCalls.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }},

"JoinRawCalls.clustered_depth_vcfs" : [{{ test_batch.merged_depth_vcf | tojson }}],
"JoinRawCalls.clustered_manta_vcfs" : [{{ test_batch.merged_manta_vcf | tojson }}],
"JoinRawCalls.clustered_wham_vcfs" : [{{ test_batch.merged_wham_vcf | tojson }}],
"JoinRawCalls.clustered_melt_vcfs" : [{{ test_batch.merged_melt_vcf | tojson }}],

"JoinRawCalls.ploidy_table": {{ test_batch.ploidy_table | tojson }},

"JoinRawCalls.contig_list": {{ reference_resources.primary_contigs_list | tojson }},
"JoinRawCalls.reference_fasta": {{ reference_resources.reference_fasta | tojson }},
"JoinRawCalls.reference_fasta_fai": {{ reference_resources.reference_index | tojson }},
"JoinRawCalls.reference_dict": {{ reference_resources.reference_dict | tojson }},

"JoinRawCalls.cohort": {{ test_batch.name | tojson }}
}
22 changes: 22 additions & 0 deletions inputs/templates/test/SVConcordance/SVConcordance.json.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"SVConcordance.gatk_docker":{{ dockers.gatk_docker_concordance | tojson }},
"SVConcordance.sv_base_mini_docker": {{ dockers.sv_base_mini_docker | tojson }},
"SVConcordance.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }},
"SVConcordance.sv_utils_docker": {{ dockers.sv_utils_docker | tojson }},

"SVConcordance.eval_vcf" : {{ test_batch.clean_vcf | tojson }},
"SVConcordance.truth_vcf" : {{ test_batch.joined_raw_calls_vcf | tojson }},

"SVConcordance.ploidy_table": {{ test_batch.ploidy_table | tojson }},
"SVConcordance.cohort": {{ test_batch.name | tojson }},

"SVConcordance.run_svutils_truth_vcf": "false",
"SVConcordance.run_formatter_truth_vcf": "false",

"SVConcordance.run_svutils_eval_vcf": "true",
"SVConcordance.run_formatter_eval_vcf": "true",
"SVConcordance.formatter_eval_args": "--only-add-cn-fields --replace-ev-format --filter-unsupported-types",

"SVConcordance.contig_list": {{ reference_resources.primary_contigs_list | tojson }},
"SVConcordance.reference_dict": {{ reference_resources.reference_dict | tojson }}
}
19 changes: 10 additions & 9 deletions inputs/values/dockers.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"condense_counts_docker": "us.gcr.io/broad-dsde-methods/tsharpe/gatk:4.2.6.1-36-g88b0578",
"gatk_docker": "us.gcr.io/broad-dsde-methods/tsharpe/gatk:4.2.6.1-36-g88b0578",
"gatk_docker_pesr_override": "us.gcr.io/broad-dsde-methods/tsharpe/gatk:4.2.6.1-36-g88b0578",
"gatk_docker_concordance": "us.gcr.io/broad-dsde-methods/markw/gatk:mw-sv-concordance-937c81",
"genomes_in_the_cloud_docker": "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135",
"linux_docker": "marketplace.gcr.io/google/ubuntu1804",
"manta_docker": "us.gcr.io/broad-dsde-methods/manta:8645aa",
Expand All @@ -12,12 +13,12 @@
"samtools_cloud_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/samtools-cloud:2022-06-10-v0.23-beta-9c6fbf56",
"sv_base_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base:2022-06-10-v0.23-beta-9c6fbf56",
"sv_base_mini_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base-mini:2022-06-10-v0.23-beta-9c6fbf56",
"sv_pipeline_base_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2022-09-21-v0.26.1-beta-c5e6abc8",
"sv_pipeline_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2022-09-21-v0.26.1-beta-c5e6abc8",
"sv_pipeline_hail_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2022-09-21-v0.26.1-beta-c5e6abc8",
"sv_pipeline_updates_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2022-09-21-v0.26.1-beta-c5e6abc8",
"sv_pipeline_qc_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2022-09-21-v0.26.1-beta-c5e6abc8",
"sv_pipeline_rdtest_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2022-09-21-v0.26.1-beta-c5e6abc8",
"sv_pipeline_base_docker": "us.gcr.io/broad-dsde-methods/markw/sv-pipeline:mw-concordance-8223c30",
"sv_pipeline_docker": "us.gcr.io/broad-dsde-methods/markw/sv-pipeline:mw-concordance-8223c30",
"sv_pipeline_hail_docker": "us.gcr.io/broad-dsde-methods/markw/sv-pipeline:mw-concordance-8223c30",
"sv_pipeline_updates_docker": "us.gcr.io/broad-dsde-methods/markw/sv-pipeline:mw-concordance-8223c30",
"sv_pipeline_qc_docker": "us.gcr.io/broad-dsde-methods/markw/sv-pipeline:mw-concordance-8223c30",
"sv_pipeline_rdtest_docker": "us.gcr.io/broad-dsde-methods/markw/sv-pipeline:mw-concordance-8223c30",
"wham_docker": "us.gcr.io/broad-dsde-methods/wham:8645aa",
"igv_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/igv:mw-xz-fixes-2-b1be6a9",
"duphold_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/duphold:mw-xz-fixes-2-b1be6a9",
Expand All @@ -28,8 +29,8 @@
"cnmops-virtual-env": "us.gcr.io/broad-dsde-methods/gatk-sv/cnmops-virtual-env:2022-06-10-v0.23-beta-9c6fbf56",
"sv-pipeline-virtual-env": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline-virtual-env:2022-06-10-v0.23-beta-9c6fbf56",
"samtools-cloud-virtual-env": "us.gcr.io/broad-dsde-methods/gatk-sv/samtools-cloud-virtual-env:2022-06-10-v0.23-beta-9c6fbf56",
"sv-utils-env": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-utils-env:2022-09-09-v0.25.1-beta-cd5ce7e0",
"sv_utils_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-utils:2022-09-21-v0.26.1-beta-c5e6abc8",
"sv-utils-env": "us.gcr.io/broad-dsde-methods/markw/sv-utils-env:mw-concordance-7251507",
"sv_utils_docker": "us.gcr.io/broad-dsde-methods/markw/sv-utils:mw-concordance-7251507",
"gq_recalibrator_docker": "us.gcr.io/broad-dsde-methods/tbrookin/gatk:0a7e1d86f",
"str": "us.gcr.io/broad-dsde-methods/gatk-sv/str:2022-09-15-v0.25.1-beta-b53e58af"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose this PR is not related to EH, if so, I wonder how build_docker.py ended up incorrectly updating this, while it worked correctly when invoked by the bot.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Had to do with the way I invoked build_docker.py - my master branch was out of date on the machine where I did the build but my working branch was rebased on the latest master, which contained your recent changes to some str code. I used "master" as the current commit - so it saw the str updates relative to the outdated master and rebuilt the docker.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That explains it, thanks!

"str": "us.gcr.io/broad-dsde-methods/markw/str:mw-concordance-7251507"
}
4 changes: 3 additions & 1 deletion inputs/values/ref_panel_1kg.json
Original file line number Diff line number Diff line change
Expand Up @@ -1120,6 +1120,7 @@
"hgdp_1kgp_ped": "gs://gatk-sv-resources-public/hg38/v0/sv-resources/resources/v1/HGDP_1KGP.ped",
"cohort_depth_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GATKSVPipelinePhase1/GATKSVPipelinePhase1/acce2c71-7458-4205-ae13-624f6efc9956/call-FilterBatch/FilterBatch/184defa3-e61c-4757-9962-f685f6d0d204/call-FilterBatchSamples/FilterBatchSamples/b308c32e-d171-4d8d-aeaf-b561c55b06b4/call-ExcludeOutliers/shard-4/cacheCopy/ref_panel_1kg.depth.outliers_removed.vcf.gz",
"cohort_pesr_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GATKSVPipelinePhase1/GATKSVPipelinePhase1/acce2c71-7458-4205-ae13-624f6efc9956/call-FilterBatch/FilterBatch/184defa3-e61c-4757-9962-f685f6d0d204/call-FilterBatchSamples/FilterBatchSamples/b308c32e-d171-4d8d-aeaf-b561c55b06b4/call-MergePesrVcfs/cacheCopy/ref_panel_1kg.filtered_pesr_merged.vcf.gz",
"concordance_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/SVConcordance/1509e8d1-5d8a-4455-9cd6-152f4f1af313/ref_panel_1kg.concordance.vcf.gz",
"contig_ploidy_model_tar": "gs://gatk-sv-resources-public/hg38/v0/sv-resources/ref-panel/1KG/v2/gcnv/ref_panel_1kg_v2-contig-ploidy-model.tar.gz",
"counts": [
"gs://gatk-sv-ref-panel-1kg/outputs/tws_earlyBAF/counts/HG00096.counts.tsv.gz",
Expand Down Expand Up @@ -1585,6 +1586,7 @@
"genotype_pesr_pesr_sepcutoff": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GenotypeBatch/GenotypeBatch/ad17f522-0950-4f0a-9148-a13f689082ed/call-GenotypePESRPart1/GenotypePESRPart1/40ec6d76-dd1c-432d-bfab-bc4426d0b1ec/call-TrainRDGenotyping/TrainRDGenotyping/e5540a96-9072-4719-bcfb-afccdfec15c6/call-UpdateCutoff/cacheCopy/ref_panel_1kg.pesr.pesr_sepcutoff.txt",
"genotyped_depth_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GenotypeBatch/GenotypeBatch/ad17f522-0950-4f0a-9148-a13f689082ed/call-GenotypeDepthPart2/GenotypeDepthPart2/0aafd752-e606-4196-86ac-41c1c3ce1eb2/call-ConcatGenotypedVcfs/cacheCopy/ref_panel_1kg.depth.vcf.gz",
"genotyped_pesr_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GenotypeBatch/GenotypeBatch/ad17f522-0950-4f0a-9148-a13f689082ed/call-GenotypePESRPart2/GenotypePESRPart2/ce1f4075-1a3e-44b5-9cfe-bfb701327616/call-ConcatGenotypedVcfs/cacheCopy/ref_panel_1kg.pesr.vcf.gz",
"joined_raw_calls_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/JoinRawCalls/a613865b-f7ec-4edb-8a2e-21508335249e/ref_panel_1kg.join_raw_calls.vcf.gz",
"manta_vcfs": [
"gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GatherSampleEvidenceBatch/GatherSampleEvidenceBatch/bb5e70f1-7186-4e29-96f8-dcb9ee000642/call-GatherSampleEvidence/shard-0/GatherSampleEvidence/bed8ee84-625c-4d39-99d4-20f9c0462ce6/call-Manta/Manta/9729df6e-82b5-40af-b2f2-028de89f2a7d/call-RunManta/cacheCopy/HG00096.manta.vcf.gz",
"gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GatherSampleEvidenceBatch/GatherSampleEvidenceBatch/bb5e70f1-7186-4e29-96f8-dcb9ee000642/call-GatherSampleEvidence/shard-1/GatherSampleEvidence/b623755c-af49-4f7c-98d9-f03d95ab9397/call-Manta/Manta/aaed987f-50bd-48f6-a4c1-181512567c1a/call-RunManta/cacheCopy/HG00129.manta.vcf.gz",
Expand Down Expand Up @@ -2239,7 +2241,7 @@
"name": "ref_panel_1kg",
"outlier_cutoff_table": "gs://gatk-sv-resources-public/hg38/v0/sv-resources/ref-panel/1KG/v1/module03_outlier_cutoff_table.tsv",
"ped_file": "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/ref-panel/1KG/v1/ped/1kg_ref_panel_v1.ped",

"ploidy_table": "gs://gatk-sv-ref-panel-1kg/outputs/ClusterBatch/mw-concordance/ref_panel_1kg.ploidy.FEMALE_chrY_1.tsv",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should open a ticket to track changes needed to be able to repopulate this JSON with create_test_batch.py including outputting this file from the Batch pipeline, or keeping this value along with the new batch's metadata. I think some other recent changes might need to sync up with the create_test_batch.py script too (ex. handling tarballs of std vcfs vs. the file lists used in the single-sample pipeline) so it would be good to keep track of those

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

"qc_definitions": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/ref_panel_1kg.qc_definitions.tsv",
"qc_file": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/sv_qc.ref_panel_1kg.tsv",
"raw_sr_background_fail_file": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GenotypeBatch/GenotypeBatch/ad17f522-0950-4f0a-9148-a13f689082ed/call-GenotypePESRPart2/GenotypePESRPart2/ce1f4075-1a3e-44b5-9cfe-bfb701327616/call-TripleStreamCatFail/cacheCopy/ref_panel_1kg.genotype_SR_part2_background_fail.txt",
Expand Down
Loading