-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #201 from Irallia/TEST/benchmarks/dataset_comparis…
…on_igenvar_only [BENCHMARK] dataset comparison igenvar only
- Loading branch information
Showing
10 changed files
with
682 additions
and
0 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
configfile: "Repos/iGenVar/test/benchmark/config/caller_comparison_config.yaml" | ||
|
||
include: "workflow/rules/callers.smk" | ||
include: "workflow/rules/eval.smk" | ||
include: "workflow/rules/eval.DUP_as_INS.smk" | ||
include: "workflow/rules/plots.smk" | ||
|
||
##### Target rules ##### | ||
|
||
rule all: | ||
input: | ||
"results/caller_comparison_iGenVar_only/eval/results.all.png", | ||
"results/caller_comparison_iGenVar_only/eval/results.DUP_as_INS.all.png" |
98 changes: 98 additions & 0 deletions
98
test/benchmark/caller_comparison_iGenVar_only/workflow/rules/callers.smk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
sample = config["parameters"]["sample"], | ||
min_var_length = config["parameters"]["min_var_length"], | ||
max_var_length = config["parameters"]["max_var_length"] | ||
|
||
rule run_iGenVar: | ||
output: | ||
vcf = "results/caller_comparison_iGenVar_only/{input_combination}/variants.vcf" | ||
log: | ||
"logs/caller_comparison_iGenVar_only/{input_combination}_output.log" | ||
threads: 2 | ||
run: | ||
if wildcards.input_combination == 'S1': # Illumina Paired End | ||
short_bam = config["short_read_bam"]["s1"] | ||
shell(""" | ||
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} \ | ||
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} --verbose \ | ||
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log} | ||
""") | ||
elif wildcards.input_combination == 'S2': # Illumina Mate Pair | ||
short_bam = config["short_read_bam"]["s2"] | ||
shell(""" | ||
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} \ | ||
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} --verbose \ | ||
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log} | ||
""") | ||
elif wildcards.input_combination == 'S1L1': # Illumina Paired End & MtSinai PacBio | ||
short_bam = config["short_read_bam"]["s1"], | ||
long_bam = config["long_read_bam"]["l1"] | ||
shell(""" | ||
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \ | ||
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} --verbose \ | ||
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log} | ||
""") | ||
elif wildcards.input_combination == 'S2L1': # Illumina Mate Pair & MtSinai PacBio | ||
short_bam = config["short_read_bam"]["s2"], | ||
long_bam = config["long_read_bam"]["l1"] | ||
shell(""" | ||
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \ | ||
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} --verbose \ | ||
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log} | ||
""") | ||
elif wildcards.input_combination == 'S1L2': # Illumina Paired End & PacBio CCS | ||
short_bam = config["short_read_bam"]["s1"], | ||
long_bam = config["long_read_bam"]["l2"] | ||
shell(""" | ||
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \ | ||
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} --verbose \ | ||
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log} | ||
""") | ||
elif wildcards.input_combination == 'S2L2': # Illumina Mate Pair & PacBio CCS | ||
short_bam = config["short_read_bam"]["s2"], | ||
long_bam = config["long_read_bam"]["l2"] | ||
shell(""" | ||
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \ | ||
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} --verbose \ | ||
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log} | ||
""") | ||
elif wildcards.input_combination == 'S1L3': # Illumina Paired End & 10X Genomics | ||
short_bam = config["short_read_bam"]["s1"], | ||
long_bam = config["long_read_bam"]["l3"] | ||
shell(""" | ||
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \ | ||
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} --verbose \ | ||
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log} | ||
""") | ||
elif wildcards.input_combination == 'S2L3': # Illumina Mate Pair & 10X Genomics | ||
short_bam = config["short_read_bam"]["s2"], | ||
long_bam = config["long_read_bam"]["l3"] | ||
shell(""" | ||
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \ | ||
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} --verbose \ | ||
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log} | ||
""") | ||
elif wildcards.input_combination == 'L1': # MtSinai PacBio | ||
long_bam = config["long_read_bam"]["l1"] | ||
shell(""" | ||
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_long_reads {long_bam} \ | ||
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} --verbose \ | ||
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log} | ||
""") | ||
elif wildcards.input_combination == 'L2': # PacBio CCS | ||
long_bam = config["long_read_bam"]["l2"] | ||
shell(""" | ||
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_long_reads {long_bam} \ | ||
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} --verbose \ | ||
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log} | ||
""") | ||
else: # wildcards.input_combination == 'L3': # 10X Genomics | ||
long_bam = config["long_read_bam"]["l3"] | ||
shell(""" | ||
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_long_reads {long_bam} \ | ||
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} --verbose \ | ||
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log} | ||
""") | ||
# Defaults: | ||
# --method cigar_string --method split_read --method read_pairs --method read_depth | ||
# --clustering_methods hierarchical_clustering --refinement_methods no_refinement | ||
# --max_tol_inserted_length 50 --max_overlap 10 --hierarchical_clustering_cutoff 0.5 |
131 changes: 131 additions & 0 deletions
131
test/benchmark/caller_comparison_iGenVar_only/workflow/rules/eval.DUP_as_INS.smk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
rule DUP_as_INS: | ||
input: | ||
vcf = "results/caller_comparison_iGenVar_only/{input_combination}/variants.vcf" | ||
output: | ||
vcf = "results/caller_comparison_iGenVar_only/{input_combination}/variants.DUP_as_INS.vcf" | ||
log: | ||
"logs/caller_comparison_iGenVar_only/DUP_as_INS.{input_combination}.log" | ||
shell: | ||
"sed -e 's/<DUP:TANDEM>/<INS>/g' {input.vcf} | sed -e 's/SVTYPE=DUP/SVTYPE=INS/g' > {output.vcf}" | ||
|
||
rule DUP_as_INS_filter_vcf: | ||
input: | ||
vcf = "results/caller_comparison_iGenVar_only/{input_combination}/variants.DUP_as_INS.vcf" | ||
output: | ||
vcf = "results/caller_comparison_iGenVar_only/{input_combination}/variants.DUP_as_INS.min_qual_{min_qual}.vcf" | ||
log: | ||
"logs/caller_comparison_iGenVar_only/filter_vcf_output.{input_combination}.DUP_as_INS.{min_qual}.log", | ||
conda: | ||
"../../../envs/bcftools.yaml" | ||
shell: | ||
"bcftools view -i 'QUAL>={wildcards.min_qual}.00' {input.vcf} > {output.vcf} 2> {log}" | ||
|
||
rule DUP_as_INS_truvari: | ||
input: | ||
vcf = "results/caller_comparison_iGenVar_only/{input_combination}/variants.DUP_as_INS.min_qual_{min_qual}.vcf.gz", | ||
index = "results/caller_comparison_iGenVar_only/{input_combination}/variants.DUP_as_INS.min_qual_{min_qual}.vcf.gz.tbi" | ||
output: | ||
summary = "results/caller_comparison_iGenVar_only/eval/{input_combination}/DUP_as_INS.min_qual_{min_qual}/summary.txt" | ||
log: | ||
"logs/truvari/truvari_output.{input_combination}.{min_qual}.log" | ||
params: | ||
output_dir = "results/caller_comparison_iGenVar_only/eval/{input_combination}/DUP_as_INS.min_qual_{min_qual}", | ||
truth_set_gz = config["truth_set_renamed_chr"]["gz"], | ||
truth_set_bed = config["truth_set_renamed_chr"]["bed"] | ||
shell: | ||
""" | ||
rm -rf {params.output_dir} && truvari bench -b {params.truth_set_gz} -c {input.vcf} -o {params.output_dir} -p 0 \ | ||
--passonly --includebed {params.truth_set_bed} &>> {log} | ||
""" | ||
# -f data/reference/hs37d5.fa | ||
|
||
rule DUP_as_INS_reformat_truvari_results: | ||
input: | ||
"results/caller_comparison_iGenVar_only/eval/{input_combination}/DUP_as_INS.min_qual_{min_qual}/summary.txt" | ||
output: | ||
"results/caller_comparison_iGenVar_only/eval/{input_combination}/DUP_as_INS.min_qual_{min_qual}/pr_rec.txt" | ||
threads: 1 | ||
shell: | ||
""" | ||
cat {input} | grep '\<precision\>\|\<recall\>' | tr -d ',' |sed 's/^[ \t]*//' | tr -d '\"' | tr -d ' ' \ | ||
| tr ':' '\t' | awk 'OFS=\"\\t\" {{ print \"{wildcards.input_combination}\", \"{wildcards.min_qual}\", $1, $2 }}' \ | ||
> {output} | ||
""" | ||
|
||
rule DUP_as_INS_cat_truvari_results_all: | ||
input: | ||
S1 = expand("results/caller_comparison_iGenVar_only/eval/S1/DUP_as_INS.min_qual_{min_qual}/pr_rec.txt", | ||
min_qual=list(range(config["quality_ranges"]["from"], | ||
config["quality_ranges"]["to"], | ||
config["quality_ranges"]["step"]))), | ||
S2 = expand("results/caller_comparison_iGenVar_only/eval/S2/DUP_as_INS.min_qual_{min_qual}/pr_rec.txt", | ||
min_qual=list(range(config["quality_ranges"]["from"], | ||
config["quality_ranges"]["to"], | ||
config["quality_ranges"]["step"]))), | ||
S1L1 = expand("results/caller_comparison_iGenVar_only/eval/S1L1/DUP_as_INS.min_qual_{min_qual}/pr_rec.txt", | ||
min_qual=list(range(config["quality_ranges"]["from"], | ||
config["quality_ranges"]["to"], | ||
config["quality_ranges"]["step"]))), | ||
S2L1 = expand("results/caller_comparison_iGenVar_only/eval/S2L1/DUP_as_INS.min_qual_{min_qual}/pr_rec.txt", | ||
min_qual=list(range(config["quality_ranges"]["from"], | ||
config["quality_ranges"]["to"], | ||
config["quality_ranges"]["step"]))), | ||
S1L2 = expand("results/caller_comparison_iGenVar_only/eval/S1L2/DUP_as_INS.min_qual_{min_qual}/pr_rec.txt", | ||
min_qual=list(range(config["quality_ranges"]["from"], | ||
config["quality_ranges"]["to"], | ||
config["quality_ranges"]["step"]))), | ||
S2L2 = expand("results/caller_comparison_iGenVar_only/eval/S2L2/DUP_as_INS.min_qual_{min_qual}/pr_rec.txt", | ||
min_qual=list(range(config["quality_ranges"]["from"], | ||
config["quality_ranges"]["to"], | ||
config["quality_ranges"]["step"]))), | ||
S1L3 = expand("results/caller_comparison_iGenVar_only/eval/S1L3/DUP_as_INS.min_qual_{min_qual}/pr_rec.txt", | ||
min_qual=list(range(config["quality_ranges"]["from"], | ||
config["quality_ranges"]["to"], | ||
config["quality_ranges"]["step"]))), | ||
S2L3 = expand("results/caller_comparison_iGenVar_only/eval/S2L3/DUP_as_INS.min_qual_{min_qual}/pr_rec.txt", | ||
min_qual=list(range(config["quality_ranges"]["from"], | ||
config["quality_ranges"]["to"], | ||
config["quality_ranges"]["step"]))), | ||
L1 = expand("results/caller_comparison_iGenVar_only/eval/L1/DUP_as_INS.min_qual_{min_qual}/pr_rec.txt", | ||
min_qual=list(range(config["quality_ranges"]["from"], | ||
config["quality_ranges"]["to"], | ||
config["quality_ranges"]["step"]))), | ||
L2 = expand("results/caller_comparison_iGenVar_only/eval/L2/DUP_as_INS.min_qual_{min_qual}/pr_rec.txt", | ||
min_qual=list(range(config["quality_ranges"]["from"], | ||
config["quality_ranges"]["to"], | ||
config["quality_ranges"]["step"]))), | ||
L3 = expand("results/caller_comparison_iGenVar_only/eval/L3/DUP_as_INS.min_qual_{min_qual}/pr_rec.txt", | ||
min_qual=list(range(config["quality_ranges"]["from"], | ||
config["quality_ranges"]["to"], | ||
config["quality_ranges"]["step"]))) | ||
output: | ||
S1 = temp("results/caller_comparison_iGenVar_only/eval/S1.DUP_as_INS.all_results.txt"), | ||
S2 = temp("results/caller_comparison_iGenVar_only/eval/S2.DUP_as_INS.all_results.txt"), | ||
S1L1 = temp("results/caller_comparison_iGenVar_only/eval/S1L1.DUP_as_INS.all_results.txt"), | ||
S2L1 = temp("results/caller_comparison_iGenVar_only/eval/S2L1.DUP_as_INS.all_results.txt"), | ||
S1L2 = temp("results/caller_comparison_iGenVar_only/eval/S1L2.DUP_as_INS.all_results.txt"), | ||
S2L2 = temp("results/caller_comparison_iGenVar_only/eval/S2L2.DUP_as_INS.all_results.txt"), | ||
S1L3 = temp("results/caller_comparison_iGenVar_only/eval/S1L3.DUP_as_INS.all_results.txt"), | ||
S2L3 = temp("results/caller_comparison_iGenVar_only/eval/S2L3.DUP_as_INS.all_results.txt"), | ||
L1 = temp("results/caller_comparison_iGenVar_only/eval/L1.DUP_as_INS.all_results.txt"), | ||
L2 = temp("results/caller_comparison_iGenVar_only/eval/L2.DUP_as_INS.all_results.txt"), | ||
L3 = temp("results/caller_comparison_iGenVar_only/eval/L3.DUP_as_INS.all_results.txt"), | ||
all = "results/caller_comparison_iGenVar_only/eval/DUP_as_INS.all_results.txt" | ||
threads: 1 | ||
run: | ||
shell("cat {input.S1} > {output.S1}") | ||
shell("cat {input.S2} > {output.S2}") | ||
shell("cat {input.S1L1} > {output.S1L1}") | ||
shell("cat {input.S2L1} > {output.S2L1}") | ||
shell("cat {input.S1L2} > {output.S1L2}") | ||
shell("cat {input.S2L2} > {output.S2L2}") | ||
shell("cat {input.S1L3} > {output.S1L3}") | ||
shell("cat {input.S2L3} > {output.S2L3}") | ||
shell("cat {input.L1} > {output.L1}") | ||
shell("cat {input.L2} > {output.L2}") | ||
shell("cat {input.L3} > {output.L3}") | ||
shell(""" | ||
cat {output.S1} {output.S2} {output.S1L1} {output.S2L1} \ | ||
{output.S1L2} {output.S2L2} {output.S1L3} {output.S2L3} \ | ||
{output.L1} {output.L2} {output.L3} > {output.all} | ||
""") |
Oops, something went wrong.