-
Notifications
You must be signed in to change notification settings - Fork 208
/
Copy pathformat_substitution_matrix.R
executable file
·32 lines (28 loc) · 1.14 KB
/
format_substitution_matrix.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/usr/bin/env Rscript
input <- file('stdin', 'r')
lines <- readLines(input)
mat <- as.matrix(read.table(textConnection(lines) , comment.char = "#", header = T))
alphabet <- rownames(mat)
alphabet <- sort(alphabet)
if ("X" %in% alphabet) {
alphabet <- c(alphabet[!(alphabet %in% c("B", "Z", "X"))], "X")
mat <- mat[alphabet, alphabet]
R <- 2**(0.5*mat)
A <- nrow(R)
p <- solve(R[-A,-A]) %*% rep(1, A-1)
avgSeqId <- sum(diag(R[-A,-A])*p*p)
} else {
alphabet <- alphabet[!(alphabet %in% c("B", "Z"))]
mat <- mat[alphabet, alphabet]
R <- 2**(0.5*mat)
A <- nrow(R)
p <- solve(R) %*% rep(1, A)
avgSeqId <- sum(diag(R)*p*p)
}
header <- paste0(lines[startsWith(lines, "#")], collapse = "\n")
p0 <- paste0("# Background (precomputed optional): ", paste(unlist(as.list(round(p, 4))), collapse = " "), " 0.00001")
# FIXME: find out how to compute lambda
lambda <- "# Lambda (precomputed optional): 0.34657"
avg <-paste0("# Avg SeqId (precomputed optional): ", round(avgSeqId, 5))
out <- paste0(capture.output(write.table(mat, quote = F, sep = "\t")), collapse = "\n")
cat(paste0(header, "\n", p0, "\n", lambda, "\n", avg, "\n\t", out, "\n"))