diff --git a/results/klebsiella/kleb_pneumoniae/large_events/large-event-mappings.py b/results/klebsiella/kleb_pneumoniae/large_events/large-event-mappings.py index 9a1070fa..8e2ebba0 100755 --- a/results/klebsiella/kleb_pneumoniae/large_events/large-event-mappings.py +++ b/results/klebsiella/kleb_pneumoniae/large_events/large-event-mappings.py @@ -7,13 +7,14 @@ import pysam import getopt -# Update sys.path to include mccortex/scripts/python/ +# Update sys.path to include paths relative to the script dir def add_rel_path_to_sys_path(rel_inc_path): import sys,os,os.path scriptdir = os.path.dirname(os.path.realpath(__file__)) incdir = os.path.realpath(scriptdir+'/'+rel_inc_path) sys.path.append(incdir) +# include mccortex/scripts/python/ add_rel_path_to_sys_path('../../../../scripts/python') import mccortex @@ -230,10 +231,13 @@ def main(args): for t in events: write_paired_alignments(good1fh,good2fh,[t[0],t[1]],[t[2],t[3]]) - # Write histogram of sizes + # Write histogram of sizes, removing flank sizes histfh = open(outdir+'/stats.txt', 'w') for t in events: - print("%i\t%i" % (len(t[0].seq),len(t[1].seq)), file=histfh) + fields = t[0].query_name.split(':') + if len(fields) < 3: raise SystemExit("Name missing flank sizes: "+t[0].query_name) + flanks = int(fields[1]) + int(fields[2]) + print("%i\t%i" % (len(t[0].seq)-flanks, len(t[1].seq)-flanks), file=histfh) histfh.close() print("after rmdup, wrote %i events" % (len(events))) diff --git a/results/klebsiella/kleb_pneumoniae/large_events/large-events-plot.R b/results/klebsiella/kleb_pneumoniae/large_events/large-events-plot.R index bfe5fc65..30c61cfe 100755 --- a/results/klebsiella/kleb_pneumoniae/large_events/large-events-plot.R +++ b/results/klebsiella/kleb_pneumoniae/large_events/large-events-plot.R @@ -4,7 +4,13 @@ # Dot plot of ref allele length vs sample allele length # -file='bubbles50K/stats.txt' +args <- commandArgs(trailingOnly=TRUE) +if(length(args) != 1) { + stop("Usage: Rscript --vanilla large-events-plot.R \n") +} + +file <- args[1] +#file <- 'bubbles50K/stats.txt' title <- expression(paste(italic('klebsiella pneumoniae'),' large event sizes')) xlabel <- 'Reference length (kbp)' @@ -13,8 +19,12 @@ ylabel <- 'Sample length (kbp)' r <- read.table(file,sep='\t',head=F,comment.char='#') r <- r / 1000 +# Get maximum and round to nearest 5 +lim <- max(r[,1],r[,2]) +lim <- floor((ceiling(lim)+4)/5)*5 + pdf(file='kleb_large_events_R.pdf', width=6, height=6) -plot(r, xlab=xlabel, ylab=ylabel, main=title, xlim=c(0,35), ylim=c(0,35)) +plot(r, xlab=xlabel, ylab=ylabel, main=title, xlim=c(0,lim), ylim=c(0,lim)) dev.off() # With ggplot @@ -27,7 +37,7 @@ df <- data.frame(ref=r[,1], sample=r[,2]) p <- ggplot(df, aes(x=ref, y=sample)) + geom_point(shape=1) + - xlim(0,35) + ylim(0,35) + + xlim(0,lim) + ylim(0,lim) + ggtitle(title) + xlab(xlabel) + ylab(ylabel) ggsave(p, file='kleb_large_events_ggplot.pdf', width=6, height=6) diff --git a/scripts/analysis/calls-covg.pl b/scripts/analysis/calls-covg.pl index cc7a502e..d3aade4b 100755 --- a/scripts/analysis/calls-covg.pl +++ b/scripts/analysis/calls-covg.pl @@ -108,13 +108,13 @@ sub print_usage if(!$c_alt_missing_covg) { $alt_missing_edges += ($c_alt_edges == EDGES_MISSING); - $alt_resolve_plain += ($c_alt_edges & EDGES_RESOLVABLE_PLAIN_BOTH); + $alt_resolve_plain += ($c_alt_edges & EDGES_RESOLVABLE_PLAIN_BOTH ? 1 : 0); $alt_resolve_links += ($c_alt_edges == EDGES_RESOLVABLE_LINKS); $alt_unresolvable += ($c_alt_edges == EDGES_UNRESOLVABLE); } $ref_missing_edges += ($c_ref_edges == EDGES_MISSING); - $ref_resolve_plain += ($c_ref_edges & EDGES_RESOLVABLE_PLAIN_BOTH); + $ref_resolve_plain += ($c_ref_edges & EDGES_RESOLVABLE_PLAIN_BOTH ? 1 : 0); $ref_resolve_links += ($c_ref_edges == EDGES_RESOLVABLE_LINKS); $ref_unresolvable += ($c_ref_edges == EDGES_UNRESOLVABLE); diff --git a/scripts/mccortex-bubbles-to-contigs.pl b/scripts/mccortex-bubbles-to-contigs.pl index d0347e85..187a6991 100755 --- a/scripts/mccortex-bubbles-to-contigs.pl +++ b/scripts/mccortex-bubbles-to-contigs.pl @@ -38,8 +38,10 @@ sub print_usage $flank5p_nkmers, $flank3p_nkmers, $branchlens, $callid) = $cb->next(); if(!defined($seq5p)) { last; } + my ($len5p,$len3p) = (length($seq5p), length($seq3p)); + for(my $i = 0; $i < @$branches; $i++) { - print ">$callid.branch$i\n"; + print ">$callid.branch$i:$len5p:$len3p\n"; print $seq5p.$branches->[$i].$seq3p."\n"; } }