-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathtwitter-mentors.py
87 lines (68 loc) · 3.78 KB
/
twitter-mentors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from twitter import *
import io, json, re, string, yaml
# read configuration file. For format, see README
with open("twitter-config.yml", "r") as configfile:
config = yaml.load(configfile)
def escape_text_for_table(text):
# first, escape the pipe charactesr. They conflict with table pipes on GH pages
text = string.replace(text, '|', '\|')
# remove new lines from the text. They cause new row to appear in GH pages table
text = string.replace(text, '\n', ' ')
return text
# helper function of make_twitter_link_clickable
def markdown_link(match):
groups = match.groups() or ''
link = groups[0]
return '[{0}]({0})'.format(link)
# the tweets are not in search results. At the end, there is a link. Make link clickable
# so visitors can go to the specific tweet to reply or
def make_twitter_link_clickable(text):
# The link to tweet is at end of text
replaced = re.sub('(https\://t.co/.*)', markdown_link, text)
return replaced
t = Twitter(
auth=OAuth(config['access_token'], config['access_token_secret'], config['consumer_key'], config['consumer_secret']))
# since quotes are normal tweets with Tweet URL at end, searching API for the tweet link
result = t.search.tweets(q="https://twitter.com/sehurlburt/status/889004724669661184", count=100)
# result is a dictionary with 'serach_metadata' and 'statuses' keys
tweets = {}
# a bit of markdown for README page in /docs
output = "This page contains a list of people (along with their tweets) who are willing to help/mentor other programmers. I am working on a searchable index. For now, just do a Ctrl/Cmd + F and see if you can find the tech you want help with."
output += "\n\nIf you notice something wrong or want to be removed, open a GitHub issue or tweet me at [@real_ishan](https://twitter.com/real_ishan)"
output += "\n\nThanks to [Stephanie Hurlburt](https://twitter.com/sehurlburt/) who [asked people to help](https://twitter.com/sehurlburt/status/889004724669661184)!\n\n----"
output += "\n\n|User|Profile Description|Tweet|"
output += "\n" + "|----|----|----|"
if result['statuses']:
while True:
# process results and output
for x in xrange(0,len(result['statuses'])):
# Two things being filtered here:
# - Retweets aren't from mentors (RT at beginning)
# - Most of the tweets less than ~45 characters aren't about mentorship
if (result['statuses'][x]['text'].find('RT ') == -1) and (len(result['statuses'][x]['text']) > 45) :
tweets[result['statuses'][x]['id']] = {
'text' : result['statuses'][x]['text'].encode('utf-8'),
'user' : {
'id' : result['statuses'][x]['user']['id'],
'name' : result['statuses'][x]['user']['name'].encode('utf-8'),
'profile' : 'https://twitter.com/' + result['statuses'][x]['user']['screen_name'].encode('utf-8'),
'description' : result['statuses'][x]['user']['description'].encode('utf-8')
}
}
# add username + profile link
output += "\n" + "[" + tweets[result['statuses'][x]['id']]['user']['name'] + "](" + tweets[result['statuses'][x]['id']]['user']['profile'] + ")" + "|"
# add the description
output += escape_text_for_table(tweets[result['statuses'][x]['id']]['user']['description']) + "|"
# add tweet
output += make_twitter_link_clickable(escape_text_for_table(tweets[result['statuses'][x]['id']]['text'])) + "|"
# no more crawling if results were less than 100 in last call
if len(result['statuses']) < 100:
break
else:
# take the ID of last tweet we had, get everything until that one
last_tweet_id = min(tweets.keys())
result = t.search.tweets(q="https://twitter.com/sehurlburt/status/889004724669661184", count=100, max_id=last_tweet_id)
else:
print "Incorrect result from Twitter"
with io.open("docs/_includes/twitter_table.md", "w", encoding="utf-8") as outfile:
outfile.write(unicode(output.decode('utf-8')))