-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathannotateExAC.extracter.py
42 lines (32 loc) · 1.17 KB
/
annotateExAC.extracter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/local/Python/2.7.8/bin
#
# annotateExAC.extracter.py
# This script pulls the annotation info from a ExAC entry
# INPUTS: file with single ExAC entry
# OUTPUTS: file with homozygote info for each variant in that ExAC entry
#
# last modified on: 09/21/15
# last modified by: Mike Warburton
import sys, re
if len(sys.argv) != 5:
print("\nERROR: Incorrect number of arguments\n")
sys.exit(1)
outputFileName = sys.argv[1]
refSeq = sys.argv[2]
altSeqs = sys.argv[3].split(',')
annos = sys.argv[4]
with open(outputFileName, 'w') as outputFile:
homozygACs = re.search(r"AC_Hom=(\d+(,\d+)*)", annos)
if homozygACs is not None:
homozygAClist = homozygACs.group(1).split(',')
if len(altSeqs) != len(homozygAClist):
print("\nERROR: The number of allele counts in the ExAC homozygote annotation does not match the number of variants\n")
sys.exit(1)
for i, alt in enumerate(altSeqs):
ref = refSeq
while ref[-1] == alt[-1] and len(ref) > 1 and len(alt) > 1:
ref = ref[:-1]
alt = alt[:-1]
outputFile.write("\t".join([ref, alt, homozygAClist[i]]) + '\n')
else:
print("\nERROR: The AC_hom annotation was not found in the ExAC database for the position\n")