-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeatures.py
86 lines (57 loc) · 1.99 KB
/
features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from cleaner import cleanTextData
def countProAntiRatio(text):
words = cleanTextData(text)
matchedWords = 0
for word in words:
if word.startswith('pro-') or word.startswith('anti'):
matchedWords += 1
return matchedWords/len(words)
def countSovietRatio(text):
words = cleanTextData(text)
matchedWords = 0
for word in words:
if word.find('soviet') > -1:
matchedWords += 1
return matchedWords/len(words)
def countNaziTermRatio(text):
#prepare nazi words list
with open('data/words/nazi_term.txt') as f:
naziWords = f.readlines()
naziWords = [w.lower().strip() for w in naziWords if w != '']
textWords = text.lower().split()
wordsFound = 0
for w in textWords:
if w in naziWords and len(w) > 2:
wordsFound += 1
return wordsFound/len(textWords)
def countCustomMarkersRatio(text):
#prepare nazi words list
with open('data/words/manip_markers.txt') as f:
customMarkers = f.readlines()
customMarkers = [w.lower() for w in customMarkers if w != '']
normalText = text.lower()
wordsFound = 0
for w in customMarkers:
w = w.strip()
if normalText.count(w) > 0 and len(w) > 2:
wordsFound += normalText.count(w)
return wordsFound/len(normalText.split())
def countMilitaryTermsRatio(text):
#prepare nazi words list
with open('data/words/military_terms.txt') as f:
militaryTerms = f.readlines()
militaryTerms = [w.lower() for w in militaryTerms if w != '']
normalText = text.lower()
wordsFound = 0
for w in militaryTerms:
w = w.strip()
if normalText.count(w) > 0 and len(w) > 2:
wordsFound += normalText.count(w)
return wordsFound/len(normalText.split())
def countPutinRatio(text):
words = cleanTextData(text)
matchedWords = 0
for word in words:
if word.find('putin') > -1:
matchedWords += 1
return matchedWords/len(words)