-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtweet_snagger.py
68 lines (55 loc) · 2.13 KB
/
tweet_snagger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import snscrape.modules.twitter as twitter
from intent_classifier import IntentClassifier
class TweetSnagger:
"""Snags sports tweets from Twitter."""
def __init__(self):
self.intent_classifier = IntentClassifier()
def snag_tweets(self, topics, intent, authors = [], num_tweets = 5, replies = False, retweets = False, min_likes = 0):
"""
Returns relevant tweets regarding input parameters.
Tweet format:
{
'url': str,
'user': str,
'content': str
}
"""
query = None
if intent:
query = self._make_query(topics + [f'({intent})'], authors, replies, retweets)
else:
query = self._make_query(topics, authors, replies, retweets)
tweets = []
print("Making Twitter query:", query)
for tweet in twitter.TwitterSearchScraper(query).get_items():
if len(tweets) == num_tweets:
break
if self._verify_source(tweet.source):
tweets.append({
'url': tweet.url,
'user': tweet.user.username,
'content': tweet.content
})
return tweets
def _verify_source(self, source):
"""Returns whether source of tweet was human or not (99% confidence)."""
return 'Web' in source or 'iPhone' in source or 'Android' in source
def _make_query(self, topics, authors, replies = False, retweets = False):
"""Formats a query string given input."""
author_q = ''
# sep = 'AND' if everything else 'OR'
if authors:
for author in authors:
author_q += f'from:{author} AND '
author_q = '(' + author_q[:-5] + ') '
topic_q = ''
for topic in topics:
topic_q += f'{topic} AND '
topic_q = '(' + topic_q[:-5] + ') '
query = author_q + topic_q
query += '(lang:en) '
if not replies:
query += '-filter:replies '
if not retweets:
query += '-filter:retweets'
return query