Skip to content

Commit

Permalink
Added cortex bubble calling comparison
Browse files Browse the repository at this point in the history
  • Loading branch information
noporpoise committed Apr 28, 2015
1 parent 916d92c commit f04d5dc
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
/benchmark
/dev
*.RData
*.RHistory
*.Rhistory

/commit.txt
/notes.txt
Expand Down
33 changes: 33 additions & 0 deletions results/bubble_calling/cortex/final.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash

set -euo pipefail

CTXDIR=../../..
VCFHEADER=$CTXDIR/scripts/bash/vcf-header
#VCFSORT=$CTXDIR/scripts/bash/vcf-sort
VCFSORT=./vcf-sort
BCFTOOLS=$CTXDIR/libs/bcftools/bcftools
BGZIP=$CTXDIR/libs/htslib/bgzip
VCFREF=~/c/vcf-hack/bin/vcfref

# Files
REF=../../data/chr22/chr22_17M_18M.fa
IN=cortex_run/vcfs/chr22_17M_18M_union_BC_calls_k31.decomp.vcf

# Add '##contig=<ID=chr22_17M_18M,length=1000000,assembly=hg19>'
# to header, and fix an INFO field
( $VCFHEADER $IN | \
grep -v '^##contig' | \
grep -v '^#CHROM' | \
sed 's/, Description=/,Description=/g';
echo '##INFO=<ID=KMER,Number=1,Type=Integer,Description="Kmer used for calling">';
echo '##contig=<ID=chr22_17M_18M,length=1000000,assembly=hg19>';
$VCFHEADER $IN | grep '^#CHROM' ) > new_header.txt

# Put new header on and filter ref mismatches
( cat new_header.txt;
$VCFREF -s $IN $REF | grep -v '^#' | sort -k1,1d -k2,2n ) > cortex.sort.vcf

$BCFTOOLS norm --remove-duplicates --fasta-ref $REF --multiallelics +both cortex.sort.vcf > cortex.norm.vcf
$BGZIP cortex.norm.vcf
$BCFTOOLS index cortex.norm.vcf.gz
36 changes: 36 additions & 0 deletions results/bubble_calling/cortex/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash

CORTEXDIR=~/cortex/releases/CORTEX_release_v1.0.5.21
CTXDIR=../../../

RUNCALLS=$CORTEXDIR/scripts/calling/run_calls.pl
CORTEX=$CORTEXDIR/bin/cortex_var_31_c1
MCCORTEX=$CTXDIR/bin/mccortex31
STAMPY=/apps/well/stampy/1.0.23-py2.6/stampy.py
REF=$(readlink -f ../../data/chr22/chr22_17M_18M.fa)
VCFTOOLSDIR=~/bioinf/vcftools_0.1.12b/


$RUNCALLS \
--first_kmer 31 \
--last_kmer 31 \
--kmer_step 2 \
--fastaq_index samples.txt \
--auto_cleaning yes \
--bc yes \
--pd no \
--outdir cortex_run \
--outvcf chr22_17M_18M \
--ploidy 2 \
--stampy_hash ref/chr22_17M_18M \
--stampy_bin $STAMPY \
--list_ref_fasta ref/ref.falist \
--refbindir ref/ \
--genome_size 1000000 \
--qthresh 5 \
--mem_height 20 --mem_width 100 \
--vcftools_dir $VCFTOOLSDIR \
--do_union yes \
--ref CoordinatesAndInCalling \
--workflow independent \
--logfile runcalls.log
31 changes: 31 additions & 0 deletions results/bubble_calling/cortex/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash

set -euo pipefail

CORTEXDIR=~/cortex/releases/CORTEX_release_v1.0.5.21
CTXDIR=../../../

RUNCALLS=$CORTEXDIR/scripts/calling/run_calls.pl
CORTEX=$CORTEXDIR/bin/cortex_var_31_c1
MCCORTEX=$CTXDIR/bin/mccortex31
STAMPY=/apps/well/stampy/1.0.23-py2.6/stampy.py
REF=$(readlink -f ../../data/chr22/chr22_17M_18M.fa)
VCFTOOLSDIR=~/bioinf/vcftools_0.1.12b/

mkdir -p ref
echo $REF > ref/ref.falist

# Make stampy hash
$STAMPY -G ref/chr22_17M_18M $REF
$STAMPY -g ref/chr22_17M_18M -H ref/chr22_17M_18M

# Build reference graph file
#$CORTEX --mem_height 20 --mem_width 100 --kmer_size 31 --sample_name REF --se_list ref/ref.falist --dump_binary ref/ref.k31.ctx
$MCCORTEX build -k 31 -s REF -1 $REF ref/ref.k31.ctx >& ref/ref.k31.ctx.log

(readlink -f ../reads/chrom0.30X.1.fa.gz;
readlink -f ../reads/chrom1.30X.1.fa.gz) > reads.1.falist
(readlink -f ../reads/chrom0.30X.2.fa.gz;
readlink -f ../reads/chrom1.30X.2.fa.gz) > reads.2.falist

printf "MrSample\t.\t%s\t%s\n" reads.1.falist reads.2.falist > samples.txt

0 comments on commit f04d5dc

Please sign in to comment.