forked from cran/PopGenome
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprobabilities.R
173 lines (148 loc) · 5.37 KB
/
probabilities.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
###############################################################################
#
# FUNCTION: calc_probabilities, calc_quantile
#
# This function compares the observed value with the data which was
# gained by coalescent simulation and calculates probabilties for the occurence
# of the observed values under the hypothesis the coalescent simulation was run
#
#
# FUNCTION CALLS:
#
#
# PARAMETERS:
# cloci: current loci
#
# niter: number of iterations for current loci
#
# csStats: a matrix containing summary statistics for current loci
#
# obsVal: a matrix with observed value, given by the user
#
# lData: an object of class locStats, for saving all computed data
#
#
# RETURN VALUES: an object of class locstats, available Slots:
# - nsam: number of samples for each iteration
#
# - niter: number of iteration
#
# - theta: mutation parameter
#
# - obsVal: vector with observed values for each test
#
# - positions: position of each polymorphic site
#
# - trees: if printtree = 1, find gene tree in Newick format,
# which is used in genetree() function
#
# - seeds: random numbers used to generate samples
#
# - halplotypes: haplotypes in each iteration
#
# - stats: variety of test stats compiled a matrix
#
# - locProbLess: Prob. that simulated val. <= to observed val. P(Sim <= Obs)
#
# - locProbEqual: Prob. that simulated val = to observed val. P(Sim = Obs)
#
# - locValidIter: number of valid iteration for each test
#
# - quantiles: 13 quantiles for each test
#
#
# AUTHOR: Niroshan Nadarajah <[email protected]>
#
# LAST MODIFIED: 10/11/03
#
###############################################################################
calc_probabilities <- function(cloci , niter, csStats, obsVal, lData,npop,testNames,numTests) {
# lData beinhaltet zu cloci die statistiken fuer jede Population
ntest <- dim(csStats[[1]])[2]
# Init
init <- as.matrix(vector("list",npop))
csProbLess <- init
csProbEqual <- init
csValidIter <- init
quantiles <- init
###########################
popnames <- paste("pop",1:npop)
colnames(csProbLess) <- "cs.prob.less"
colnames(csProbEqual) <- "cs.prob.equal"
colnames(csValidIter) <- "cs.valid.iter"
colnames(quantiles) <- "quantiles"
rownames(csProbLess) <- popnames
rownames(csProbEqual) <- popnames
rownames(csValidIter) <- popnames
rownames(quantiles) <- popnames
#print(csProbLess)
for(xx in 1:npop){
# get the number of tests
# ntest <- dim(csStats)[2]-1
# set up matrix and name columns for inserting the calculated quantiles
quantiles[[xx]] <- matrix(nrow=length(quantileProbs), ncol=ntest)
colnames(quantiles[[xx]]) <- testNames
rownames(quantiles[[xx]]) <- quantileProbs
# define space to save probability value for each locus
csProbLess[[xx]] <- matrix( ncol=ntest )
csProbEqual[[xx]] <- matrix( ncol=ntest )
csValidIter[[xx]] <- matrix( ncol=ntest )
colnames(csProbLess[[xx]]) <- testNames
colnames(csProbEqual[[xx]]) <- testNames
colnames(csValidIter[[xx]]) <- testNames
rownames(csProbLess[[xx]]) <- popnames[xx]
rownames(csProbEqual[[xx]]) <- popnames[xx]
rownames(csValidIter[[xx]]) <- popnames[xx]
for (ctest in 1:ntest) {
# probability values can only be gained if observed values are present
if (!is.na(obsVal[1][1]) ) {
count <- 0
countequal <- 0
total <- 0
for (j in 1: niter) {
currStat <- as.numeric(csStats[[xx]][,ctest][j])
if (!is.na(currStat) && !is.nan(currStat) && !is.na(obsVal[[xx]][,ctest])) {
total <- total +1
# probability that simulated values be smaller or equal than the observed value. p(sim <= obs): accuracy of +- 1e-6
if ((obsVal[[xx]][,ctest] >= currStat+1e-05) || (obsVal[[xx]][,ctest] >= currStat -1e-05) ) {
count = count + 1;
}
# probability that simulated values be equal to the observed value. p(sim = obs): accuracy of +- 1e-6
if ( (obsVal[[xx]][,ctest] <= currStat+1e-05) && (obsVal[[xx]][,ctest] >= currStat-1e-05) ) {
countequal = countequal + 1;
}
}
} #for loop j : niter
#probability
if ( total > 0 ) {
csProbLess[[xx]] [ctest] <- count/total
csProbEqual[[xx]] [ctest] <- countequal/total
csValidIter[[xx]] [ctest] <- total
}
}
# calculate values for specified quantiles
qcol = calc_quantile(csStats[[xx]], ctest)
quantiles[[xx]][,ctest] <- cbind(qcol)
} #for loop ctest : ntest (number of tests performed)
}# End for npops
# add all calculated values to the appropriate slots
lData@quantiles <- as.matrix(quantiles)
if (length(obsVal) > 0) {
#colnames(csProbLess) <- testNames
[email protected] = as.matrix(csProbLess)
#colnames(csProbEqual) <- testNames
[email protected] = as.matrix(csProbEqual)
#colnames(csValidIter) <- testNames
[email protected] = as.matrix(csValidIter)
}
return(lData)
}
#------------------------------------------------------------------------------#
# calc_quantile #
#------------------------------------------------------------------------------#
## this function returns the calculated quantiles for the column specified in
## ctest. the quantile probabilties are specified in the vector quantileProbs
## which is found in the coalsim file
calc_quantile <- function(csStats, ctest) {
return(quantile(csStats[,ctest], probs = quantileProbs, na.rm = TRUE))
}