-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbar.py
52 lines (45 loc) · 1.31 KB
/
bar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data = pd.read_csv('data/dataset.csv')
features = ['sentence_length', 'compound','neg' , 'neu' , 'pos', 'punctuation_count', 'contain_profanity', 'num_profanity']
bins = {
'sentence_length' : 296,
'compound' : 10,
'neg' : 10,
'neu' : 10,
'pos' : 10,
'punctuation_count' : 10,
'contain_profanity' : 10,
'num_profanity' : 10,
}
xlimit = {
'sentence_length' : 200,
'compound' : 1,
'neg' : 1,
'neu' : 1,
'pos' : 1,
'punctuation_count' : 0.5,
'contain_profanity' : 1,
'num_profanity' : 3.5,
}
X = data[features]
for feat in X:
# feature vs count
plt.hist(data[feat], bins = bins[feat], rwidth=0.9)
plt.title("{} vs. # of tweets".format(feat))
plt.xlabel("{}".format(feat))
plt.ylabel("# of Tweets")
plt.xlim(0, xlimit[feat])
plt.savefig('data_count/count/{}_count.png'.format(feat), format='png')
plt.clf()
# feature vs annotation
plt.scatter(data[feat], data["annotation"], s=2)
plt.title("{} vs. Troll Tweet".format(feat))
plt.xlabel("{}".format(feat))
plt.ylabel("Troll Label")
axes = plt.gca()
axes.set_ylim([-0.7, 1.7])
plt.yticks([0, 1])
plt.savefig('data_count/output/{}_vs_annotation.png'.format(feat), format='png')
plt.clf()