-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharm.py
64 lines (59 loc) · 2.25 KB
/
arm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# ARM by Ken Hall 8/4/2017
# Acronym reclamation module (ARM) creates a dictionary of acronyms for a txt file
# Requires one input argument: path to txt file contaning paper to repair
import sys
import re
import string
from pprint import pprint
filename = sys.argv[1]
f = open(filename,'r',encoding='UTF8')
# Build a dictionary of acronym substitutions. Any combination of captial
# letters in parentheses will trigger an acronym pattern match search
acronym_def = re.compile(r'\([A-Z]+\)')
acronym_dict = {}
for line in f:
#line = (c for c in line2 if 0 < ord(c) < 127)
acronym_defs = acronym_def.findall(line)
words = line.split()
for item in acronym_defs:
stripped_acronym = item[1:-1]
i = len(stripped_acronym)
# print("i: ",i)
first_letter = item[1]
while i < len(words):
ksub = re.sub(r'[^\w\s\(\)]','',words[i])
words[i] = ksub
#print(item)
# print(words[i])
if words[i] == item:
# print("MATCH!")
j = i - len(stripped_acronym)
# print("j: ",j)
myphrase = ""
while str(words[j][0]).lower() != str(first_letter).lower() and j < i:
#print("Words first letter: " + str(words[j][0]).lower() + " item first letter: " + str(first_letter).lower())
j += 1
while j < i:
myphrase += (words[j]) + " "
j += 1
myphrase2 = myphrase[:-1]
if myphrase2 != "":
acronym_dict[stripped_acronym] = myphrase2
# print("Matched " + stripped_acronym + " to " + myphrase)
i += 1
#print(stripped_acronym)
print("\nAcronym Dictionary: ")
pprint(acronym_dict)
# Now, write a new file with all of the acronym substitutions in place
savename = filename[:-4] + "-ARMED.txt"
f2 = open(savename,'w')
f = open(filename,'r',encoding='UTF8')
for line in f:
line2 = line
for key in sorted(acronym_dict, key=len, reverse=True):
line3 = re.sub(r'\([A-Z]+\)','',line2)
line4 = re.sub(r'\s\.','.',line3)
line5 = line4.replace(key,acronym_dict[key])
line2 = line5
f2.write(line2)
print("\n\nWrote \"" + savename + "\"")