-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_MCaller.R
128 lines (102 loc) · 5.55 KB
/
run_MCaller.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/env Rscript
print("Running Metacaller..")
options(warn=-1)
# check libraries
libraries <- c("stringr", "reshape2", "reshape", "tools", "optparse")
for(j in libraries){
#print(j)
status<-j %in% rownames(installed.packages())
if(status == TRUE){
suppressMessages(library(j, character.only=TRUE))
} else{
install.packages(j)
suppressMessages(library(j, character.only=TRUE))
}
}
# arguments
option_list <- list(
make_option(c("-E", "--folder_eric"), type="character", default=NULL, help="EricScript folder",
metavar = "character"),
make_option(c("-J", "--folder_jaffa"), type="character", default=NULL,
help="JAFFA folder", metavar="character"),
make_option(c("-F", "--folder_fc"), type="character", default=NULL, help="FusionCatcher folder",
metavar = "character"),
make_option(c("-o", "--out_folder"), type="character", default=NULL,
help="out folder", metavar = "character")
)
opt_parser <- OptionParser(option_list=option_list);
opt <- parse_args(opt_parser)
if (length(opt) <= 4){
print_help(opt_parser)
stop("All arguments must be provided", call.=FALSE)
}
# results data
eric_dir <- opt$folder_eric
jaffa_dir <- opt$folder_jaffa
fc_dir <- opt$folder_fc
out_folder <- opt$out_folder
# create sample sheet
eric_samplesheet <- list.files(eric_dir)
jaffa_samplesheet <- list.files(jaffa_dir)
fc_samplesheet <- list.files(fc_dir)
setwd(out_folder)
for(i in 1:length(eric_samplesheet)){
### EricScrip filtering and table rearanging ###
eric_results <- read.table(paste0(eric_dir, "/", eric_samplesheet[i]), sep="\t", quote = "", header = T)
# filtering to only high confidence fusions ES > 0.90
eric_results <- eric_results[as.numeric(as.character(eric_results$EricScore)) > 0.90,]
eric_results <- eric_results[-grep("Unable to predict breakpoint position", eric_results$Breakpoint1),]
eric_results <- eric_results[-grep("Unable to predict breakpoint position", eric_results$Breakpoint2),]
#Extract sample name
sample_name<-basename(paste0(eric_samplesheet[i]))
sample_string<-gsub("_.*","", sample_name)
### JAFFA filtering and table rearanging ###
jaffa_results <- read.table(paste0(jaffa_dir, "/", jaffa_samplesheet[i]), sep="\t", quote = "", header = T)
# filtering to only high confidence fusions
jaffa_results <- jaffa_results[grep("HighConfidence",jaffa_results[,17]),]
### FusionCatcher filtering a table rearanging ###
fusioncatcher_results <- read.table(paste0(fc_dir, "/", fc_samplesheet[i]), sep="\t", quote = "", header = T)
# create filtered data frames for individual callers
caller1 <- data.frame(sample=sample_string, gene1=eric_results$GeneName1, position1 = eric_results$Breakpoint1,
chromosome1=eric_results$chr1,
gene2=eric_results$GeneName2, position2 = eric_results$Breakpoint2,
chromosome2=eric_results$chr2,
tool="eric")
caller1 <-caller1[!duplicated(caller1[,c(2,4,5,7)]),]
caller2 <- data.frame(sample=sample_string, gene1=fusioncatcher_results[,1], position1 = sub('.*\\:', '', gsub('.{2}$', '',fusioncatcher_results[,9])),
chromosome1=gsub(":", "",str_sub(fusioncatcher_results[,9],1,2)),
gene2=fusioncatcher_results[,2], position2 = sub('.*\\:', '', gsub('.{2}$', '',fusioncatcher_results[,10])),
chromosome2=gsub(":", "",str_sub(fusioncatcher_results[,10],1,2)),
tool="fusioncatcher")
caller2 <-caller2[!duplicated(caller2[,c(2,4,5,7)]),]
#split fusion genes from jaffa results to two diferent columns
genes<-str_split_fixed(jaffa_results[,2], ":", 2)
gene1<-gsub("^.","",genes[,1])
gene2<-gsub(".$","",genes[,2])
caller3 <- data.frame(sample=sample_string, gene1=gene1, position1 = jaffa_results[,4],
chromosome1=gsub('"', "",gsub("chr","",jaffa_results[,3])),
gene2=gene2, position2 = jaffa_results[,7],
chromosome2=gsub('"', "",gsub("chr","",jaffa_results[,6])),
tool="jaffa")
caller3 <-caller3[!duplicated(caller3[,c(2,4,5,7)]),]
# paste
callers <-Reduce(function(x, y) merge(x, y, all=TRUE), list(caller1, caller2, caller3))
header<-data.frame(sample="sample", gene1="gene1", gene1_position="gene1_pos", chromosome1="chr1", gene2="gene2", gene2_position="gene2_pos", chromosome2="chr", tool="tool")
file <- paste0(out_folder, "/Sample",i,"_allmethods_results_pos.txt")
file_out <- paste("Sample",i,"_method_overlap_pos.txt", sep="")
write.table(header, file="header", sep="\t",row.names=F, quote=F)
write.table(callers, file=file, sep="\t",row.names=F, quote=F)
# metacaller
command_awk <- paste("awk 'n=x[$2,$4,$5,$7]{print n", '"\\n\"', "$0;} {x[$2,$4,$5,$7]=$0;}'", sep = "")
system(paste(command_awk, file, ">", file_out, sep=" "))
}
system(paste("head -n 1 header > header1"))
system(paste("cat header1 *method_overlap_pos.txt >> all_samples_results_metacaller.txt"))
system(paste("rm header header1 *_allmethods_results_pos.txt *method_overlap_pos.txt"))
# create final table
data_res<-read.table("all_samples_results_metacaller.txt", header = T, sep = "\t")
test<-melt(data_res, id_var=c("gene1", "gene2", "chromosome1", "chromosome2", "sample"), measure.vars = "tool")
final_df<-suppressMessages(dcast(test, sample+gene1+chromosome1+gene2+chromosome2~value, value.var= "variable"))
## change values from 2 to 1!!!
write.table(final_df, file="Metacaller_final_table.txt", sep="\t", quote=F, row.names = F)
print("..done")