diff --git a/.gitignore b/.gitignore index 3707c4b1..43016293 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,7 @@ /benchmark /dev *.RData -*.RHistory +*.Rhistory /commit.txt /notes.txt diff --git a/results/bubble_calling/cortex/final.sh b/results/bubble_calling/cortex/final.sh new file mode 100755 index 00000000..6d4e135f --- /dev/null +++ b/results/bubble_calling/cortex/final.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +set -euo pipefail + +CTXDIR=../../.. +VCFHEADER=$CTXDIR/scripts/bash/vcf-header +#VCFSORT=$CTXDIR/scripts/bash/vcf-sort +VCFSORT=./vcf-sort +BCFTOOLS=$CTXDIR/libs/bcftools/bcftools +BGZIP=$CTXDIR/libs/htslib/bgzip +VCFREF=~/c/vcf-hack/bin/vcfref + +# Files +REF=../../data/chr22/chr22_17M_18M.fa +IN=cortex_run/vcfs/chr22_17M_18M_union_BC_calls_k31.decomp.vcf + +# Add '##contig=' +# to header, and fix an INFO field +( $VCFHEADER $IN | \ + grep -v '^##contig' | \ + grep -v '^#CHROM' | \ + sed 's/, Description=/,Description=/g'; + echo '##INFO='; + echo '##contig='; + $VCFHEADER $IN | grep '^#CHROM' ) > new_header.txt + +# Put new header on and filter ref mismatches +( cat new_header.txt; + $VCFREF -s $IN $REF | grep -v '^#' | sort -k1,1d -k2,2n ) > cortex.sort.vcf + +$BCFTOOLS norm --remove-duplicates --fasta-ref $REF --multiallelics +both cortex.sort.vcf > cortex.norm.vcf +$BGZIP cortex.norm.vcf +$BCFTOOLS index cortex.norm.vcf.gz diff --git a/results/bubble_calling/cortex/run.sh b/results/bubble_calling/cortex/run.sh new file mode 100755 index 00000000..736d39c0 --- /dev/null +++ b/results/bubble_calling/cortex/run.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +CORTEXDIR=~/cortex/releases/CORTEX_release_v1.0.5.21 +CTXDIR=../../../ + +RUNCALLS=$CORTEXDIR/scripts/calling/run_calls.pl +CORTEX=$CORTEXDIR/bin/cortex_var_31_c1 +MCCORTEX=$CTXDIR/bin/mccortex31 +STAMPY=/apps/well/stampy/1.0.23-py2.6/stampy.py +REF=$(readlink -f ../../data/chr22/chr22_17M_18M.fa) +VCFTOOLSDIR=~/bioinf/vcftools_0.1.12b/ + + +$RUNCALLS \ +--first_kmer 31 \ +--last_kmer 31 \ +--kmer_step 2 \ +--fastaq_index samples.txt \ +--auto_cleaning yes \ +--bc yes \ +--pd no \ +--outdir cortex_run \ +--outvcf chr22_17M_18M \ +--ploidy 2 \ +--stampy_hash ref/chr22_17M_18M \ +--stampy_bin $STAMPY \ +--list_ref_fasta ref/ref.falist \ +--refbindir ref/ \ +--genome_size 1000000 \ +--qthresh 5 \ +--mem_height 20 --mem_width 100 \ +--vcftools_dir $VCFTOOLSDIR \ +--do_union yes \ +--ref CoordinatesAndInCalling \ +--workflow independent \ +--logfile runcalls.log diff --git a/results/bubble_calling/cortex/setup.sh b/results/bubble_calling/cortex/setup.sh new file mode 100755 index 00000000..5e694430 --- /dev/null +++ b/results/bubble_calling/cortex/setup.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -euo pipefail + +CORTEXDIR=~/cortex/releases/CORTEX_release_v1.0.5.21 +CTXDIR=../../../ + +RUNCALLS=$CORTEXDIR/scripts/calling/run_calls.pl +CORTEX=$CORTEXDIR/bin/cortex_var_31_c1 +MCCORTEX=$CTXDIR/bin/mccortex31 +STAMPY=/apps/well/stampy/1.0.23-py2.6/stampy.py +REF=$(readlink -f ../../data/chr22/chr22_17M_18M.fa) +VCFTOOLSDIR=~/bioinf/vcftools_0.1.12b/ + +mkdir -p ref +echo $REF > ref/ref.falist + +# Make stampy hash +$STAMPY -G ref/chr22_17M_18M $REF +$STAMPY -g ref/chr22_17M_18M -H ref/chr22_17M_18M + +# Build reference graph file +#$CORTEX --mem_height 20 --mem_width 100 --kmer_size 31 --sample_name REF --se_list ref/ref.falist --dump_binary ref/ref.k31.ctx +$MCCORTEX build -k 31 -s REF -1 $REF ref/ref.k31.ctx >& ref/ref.k31.ctx.log + +(readlink -f ../reads/chrom0.30X.1.fa.gz; + readlink -f ../reads/chrom1.30X.1.fa.gz) > reads.1.falist +(readlink -f ../reads/chrom0.30X.2.fa.gz; + readlink -f ../reads/chrom1.30X.2.fa.gz) > reads.2.falist + +printf "MrSample\t.\t%s\t%s\n" reads.1.falist reads.2.falist > samples.txt