forked from cran/PopGenome
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreadVCFchunkHap.R
84 lines (57 loc) · 1.85 KB
/
readVCFchunkHap.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
readVCFchunkHap <- function(vcffile, samplenames=FALSE){
# if(file.exists("SNPRObjects")){unlink("SNPRObjects",recursive=TRUE)}
# Get the sample names !
vcf_handle <- file(vcffile)
open(vcf_handle)
while(1){
line <- readLines(vcf_handle,n=1)
patt <- substr(line,1,2)
if(patt!="##"){break}
}
line <- strsplit(line,"\t")[[1]]
individuals <- line[10:length(line)]
close(vcf_handle)
#
# End of get individuals
# specify colClasses
spec <- c("NULL","integer","NULL","character",
"character", rep("NULL",4),rep("character",length(individuals)))
vcf <- read.table(vcffile, sep="\t", colClasses=spec)
# Save infos from vcf chunk
pos <- vcf[,1]
ref <- vcf[,2]
alt <- vcf[,3]
ind <- vcf[,4:dim(vcf)[2]]
# FIRST HAPLOTYPE MATRIX !!! #############################################################################
ind_MATRIX <- matrix(,dim(ind)[2],dim(ind)[1])
# fill individual matrix
op <- options(warn = (-1)) # keine Warnmeldungen !
for( xx in 1:dim(ind)[2] ){
ind_MATRIX[xx,] <- as.integer(substr(ind[,xx], 1, 1)) # 1/1
}
options(op) # Warnmeldungen wieder an !
ind_MATRIX <- ind_MATRIX + 1
# create ALT MATRIX
ALTlist <- strsplit(alt,",")
n.val <- sapply(ALTlist,function(x){return(length(x))})
alt_MATRIX <- matrix(,length(pos),max(n.val))
for(xx in 1:length(ALTlist)){
alt_MATRIX[xx,1:length(ALTlist[[xx]])] <- ALTlist[[xx]]
}
# MAP Matrix
MAP_MATRIX <- cbind(ref,alt_MATRIX)
Code_matrix <- matrix(,dim(ind_MATRIX)[1], length(pos))
for(xx in 1:dim(MAP_MATRIX)[1]){
Code_matrix[,xx] <- MAP_MATRIX[xx,][ind_MATRIX[,xx]]
}
Code_matrix <- .Call("code_nucs",Code_matrix)
# ----------------------------------------------
# Set the rownames
if(samplenames[1]!=FALSE){
rownames(Code_matrix) <- samplenames
}else{
rownames(Code_matrix) <- individuals
}
o_b_j_sub <- list(matrix=Code_matrix,reference=NaN,positions=pos)
return(o_b_j_sub)
}