-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_db.py
78 lines (65 loc) · 2.29 KB
/
create_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
from config import RAW_DATA_INFO, UPDATE, MAX_THREADS
from workflow.steps import work_flow
# Create structure
BASE = os.getcwd()
SPECIES = ['sus']
units = ['annotation', 'id_mapper', 'go', 'kegg', 'interpro', 'mesh', 'reactome', 'msigdb','tftargets']
path_list = [
os.path.join(BASE, 'sqlite'),
os.path.join(BASE, 'data')
]
path_list.extend([os.path.join(path_list[1], 'raw', x) for x in units])
path_list.extend([os.path.join(path_list[1], 'tmp', x) for x in units])
path_list.extend([os.path.join(path_list[1], 'output')])
print(f'Now try to create {len(path_list)} paths for the DB...')
print()
for item in path_list:
if not os.path.exists(item):
os.makedirs(item)
print(f'created path {item}')
else:
print(f'{item} already exists.')
print()
print("--------------------------------")
print(" INITIALIZING A NEW WORK FLOW ")
print("--------------------------------")
current = work_flow(RAW_DATA_INFO, UPDATE, BASE, SPECIES, MAX_THREADS)
print('DONE!')
print("------------------------------\n\n")
if UPDATE:
# 1. download raw data from source if the purpose is to UPDATE
print("--------------------------------")
print(" DOWNLOADING FILES FROM SOURCE ")
print("--------------------------------")
current.download()
print('DONE!\n')
print("------------------------------\n\n")
# 2. parse raw data and generate priliminary tables
print("--------------------------------")
print("PARSING CURRENT VERSION RAW DATA")
print("--------------------------------")
# current.parse()
print('DONE!\n')
print("------------------------------\n\n")
# 3. assign UID(ek_id) and generate schema
print("--------------------------------")
print(" GENERATING DATABASE SCHEMA ")
print("--------------------------------")
current.migrate()
print('DONE!\n')
print("------------------------------\n\n")
# 4. process tf targets
print("--------------------------------")
print(" PROCESSING TFTARGETS ")
print("--------------------------------")
# current.tftarget()
print('DONE!\n')
print("------------------------------\n\n")
# 5. process tf targets
print("----------------------------------")
print(" Create a portable sqlite database")
print("----------------------------------")
# current.populate()
print('DONE!\n')
print("------------------------------\n\n")