-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess_html.py
60 lines (48 loc) · 2.05 KB
/
process_html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/python3
"""Parses Arma 3 launcher mod list html
"""
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
from bs4 import BeautifulSoup
import re
import sys
import argparse
modIDRe = "(\?id=)([0-9]+)"
def loadMods(file, names=False, at=True):
"""
take mod html and writes ids to stdout, returns a list of dicts with `name` and `id`
:param file: Path to HTML file to load
:return: list of dicts of mod names and ids
"""
with open(file, "r") as f:
page = f.read()
parsed_html = BeautifulSoup(page, features="html.parser")
modlistHTML= (parsed_html.body.findAll('tr', attrs={'data-type':'ModContainer'}))
modlist = []
for mod in modlistHTML:
try:
_idStr = mod.find("a", attrs={'data-type':'Link'}).text
if names:
spacer = '"@' if at else '"'
# print(re.findall(modIDRe, _idStr)[0][1], spacer+str(mod.find("td", attrs={"data-type":"DisplayName"}).text.strip())+'"')
else:
# print(re.findall(modIDRe, _idStr)[0][1])
pass
modlist.append({"name": mod.find("td", attrs={"data-type":"DisplayName"}).text, "ID":re.findall(modIDRe, _idStr)[0][1]})
except:
pass
return modlist
if __name__ =="__main__":
argParser = argparse.ArgumentParser(description='Process Arma 3 modlists')
argParser.add_argument("fileList", type=str, nargs="?",
help='HTML file to be processed')
argParser.add_argument("-n", "--names", type=bool, nargs="?", const=True, default=False,
help='Output names in addition to ids')
argParser.add_argument("-a", "--at", type=bool, nargs="?", const=True, default=False,
help='prepend "@" to names')
args = argParser.parse_args()
# _modlistFileName = sys.argv [1]
modlist = loadMods(args.fileList, args.names, args.at)
for m in modlist:
print(f'{m["ID"]} {"@" if args.at and args.names else ""}{m["name"] if args.names else ""}')