-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_fws.py
92 lines (74 loc) · 3.66 KB
/
test_fws.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import numpy as np
from importlib.resources import files
from fws import fws, maf_from_AD, maf_from_GT, exphet_from_maf, get_bin_indices
def test_fws_AD():
AD_testdata = np.load(files('testdata').joinpath('AD.npy'))
f = fws(AD_testdata)[0:5]
desired_result = [0.99514152, 0.99817753, 0.99540397, 0.99217399, 0.99927452]
for i in range(len(desired_result)):
assert round(f[i], 5) == round(desired_result[i], 5)
def test_fws_GT():
GT_testdata = np.load(files('testdata').joinpath('GT.npy'))
f = fws(GT_testdata, "GT")[0:5]
desired_result = [0.99937286, 0.99928378, 0.99932071, 0.99837554, 0.99939141]
for i in range(len(desired_result)):
assert round(f[i], 5) == round(desired_result[i], 5)
def test_maf_from_AD():
# four samples, N sites, two alleles:
AD = np.array([[[10,0], [10,0], [0,10], [0,10]],
[[10,0], [np.nan,np.nan], [0,10], [np.nan,np.nan]], # -1 and np.nan are both classed as missing data
[[10,0], [-1,-1], [0,10], [-1,-1]],
[[-1,-1], [np.nan,np.nan], [-1,-1], [np.nan,np.nan]], # all missing, should return np.nan
])
mafs = maf_from_AD(AD)
desired_result = np.array([0.5,
0.5,
0.5,
np.nan])
for i in range(AD.shape[0]):
assert mafs[i] == desired_result[i] or (np.isnan(mafs[i]) and np.isnan(desired_result[i])) # np.nan == np.nan evaluates to False, which is why we need the extra logic here
def test_maf_from_GT():
GT = np.array([[[1,1], [1,1], [0,0], [0,0]],
[[1,1], [np.nan,np.nan], [0,0], [np.nan,np.nan]], # -1 and np.nan are both classed as missing data
[[1,1], [-1,-1], [0,0], [-1,-1]],
[[-1,-1], [np.nan,np.nan], [-1,-1], [np.nan,np.nan]], # all missing, should return np.nan
[[2,2], [2,2], [2,2], [0,0]],
[[2,0], [2,0], [2,0], [2,0]],
])
mafs = maf_from_GT(GT)
desired_result = np.array([0.5,
0.5,
0.5,
np.nan,
0.25,
0.5])
for i in range(GT.shape[0]):
assert mafs[i] == desired_result[i] or (np.isnan(mafs[i]) and np.isnan(desired_result[i])) # np.nan == np.nan evaluates to False, so need some extra logic here
def test_exphet_from_maf():
mafs = np.array([0.5, 0.25, 0.3])
eh = exphet_from_maf(mafs)
assert len(eh) == 3
assert (eh == np.array([0.5, 0.375, 0.42])).all()
mafs = np.array([0.1, np.nan])
eh = exphet_from_maf(mafs)
desired_result = np.array([0.18, np.nan])
assert len(eh) == 2
# for i in range(len(eh)):
for i in range(1):
assert round(eh[i], 3) == round(desired_result[i], 3) or (np.isnan(eh[i]) and np.isnan(desired_result[i]))
def test_bin_indices():
mafs = np.array([0.5, 0.25, 0.3, 0.1, 0.001, 0.01, 0.025, 0.35, np.nan, np.nan]) # np.nan shouldn't appear in the output
bin_indices = get_bin_indices(mafs)
assert len(bin_indices) == 10 # because there are ten bins between 0 and 0.5 in increments of 0.05
desired_result = [np.array([4, 5, 6]),
np.array([3]),
np.array([]),
np.array([]),
np.array([1]),
np.array([2]),
np.array([]),
np.array([7]),
np.array([]),
np.array([0])]
for i in range(10):
assert (bin_indices[i] == desired_result[i]).all()