-
Notifications
You must be signed in to change notification settings - Fork 36
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Avatars: Fetch avatars for websites with taken usernames #42
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,3 +10,5 @@ termcolor==1.1.0 | |
Werkzeug==0.11.3 | ||
wheel==0.24.0 | ||
pyyaml==3.12 | ||
beautifulsoup4==4.6.0 | ||
parameterized==0.6.1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,8 @@ | ||
--- | ||
facebook: | ||
avatar_usernames: | ||
- "gyanlakhwani" | ||
- "brianchesky" | ||
taken_usernames: | ||
- "manu.chroma" | ||
- "vijeth.kv" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import unittest | ||
import json | ||
import requests as r | ||
import yaml | ||
import pytest | ||
from parameterized import parameterized | ||
import logging | ||
import username_api | ||
import os.path | ||
|
||
data = yaml.load(open(os.path.join('tests', 'test_data.yml'))) | ||
websites = yaml.load(open('websites.yml')) | ||
|
||
|
||
def load_test_cases(type): | ||
res = [] | ||
for website in data: | ||
if website == 'behance': | ||
users = [None] | ||
elif type == 'with_avatar': | ||
if 'avatar_usernames' in data[website]: | ||
users = data[website]['avatar_usernames'] | ||
else: | ||
users = data[website]['taken_usernames'] | ||
else: | ||
users = data[website]['available_usernames'] | ||
res.extend((website, user) for user in users) | ||
|
||
return res | ||
|
||
|
||
def custom_name_func(testcase_func, param_num, param): | ||
return '%s_%s' % ( | ||
testcase_func.__name__, | ||
parameterized.to_safe_name('_'.join(str(x) for x in param.args)), | ||
) | ||
|
||
|
||
class TestGet_avatar(object): | ||
|
||
@parameterized.expand(load_test_cases('with_avatar'), | ||
testcase_func_name=custom_name_func) | ||
def test_with_avatar(self, website, user): | ||
if not user: | ||
pytest.skip("website not supported") | ||
link = username_api.check_username(website, user)['avatar'] | ||
response = r.get(link) | ||
assert(response.headers.get('content-type', '').startswith('image/') or | ||
response.headers.get('content-type') == 'application/octet-stream') | ||
|
||
@parameterized.expand(load_test_cases('without_avatar'), | ||
testcase_func_name=custom_name_func) | ||
def test_without_avatar(self, website, user): | ||
if not user: | ||
pytest.skip("website not supported") | ||
assert username_api.check_username(website, user)['avatar'] is None |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
from flask import Flask, jsonify | ||
from flask.ext.cors import CORS, cross_origin | ||
import requests as r | ||
from bs4 import BeautifulSoup | ||
|
||
import os | ||
import sys | ||
import re | ||
import yaml | ||
|
@@ -12,12 +14,57 @@ | |
|
||
patterns = yaml.load(open('websites.yml')) | ||
|
||
def check_username(website, username): | ||
url = patterns['urls'].get(website, 'https://{w}.com/{u}').format( | ||
def get_profile_url(website, username): | ||
return patterns['urls'].get(website, 'https://{w}.com/{u}').format( | ||
w=website, | ||
u=username | ||
) | ||
|
||
def get_avatar(website, username): | ||
data = patterns['avatar'][website] | ||
|
||
if not data: | ||
return None | ||
|
||
url = get_profile_url(website, username) | ||
if 'url' in data: | ||
url = data['url'].format(u=username) | ||
|
||
response = r.get(url) | ||
if response.status_code == 404: | ||
return None | ||
|
||
if data == 'opengraph': | ||
# Look in metadata for image. | ||
soup = BeautifulSoup(response.text, 'html.parser') | ||
result = [item.attrs['content'] for item in soup('meta') | ||
if item.has_attr('property') and | ||
item.attrs['property'].lower() == 'og:image'] | ||
if not result or not result[0]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is check for both There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, some website returned an empty string for some reason. This is for that. (I can't remember which one, I think it was fb) |
||
return None | ||
result = result[0] | ||
elif 'key' in data: | ||
# Searches for "`key`": "`link`" | ||
regex = re.compile('[\'\"]' + re.escape(data['key']) + | ||
'[\'\"]:(\s)?[\'\"](?P<link>[^\s]+)[\'\"]') | ||
result = re.search(regex, response.text) | ||
if not result: | ||
return None | ||
result = result.group('link') | ||
elif response.headers.get('content-type', '').startswith('image/'): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this one applies for twitter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes. It redirects to homepage if the user doesn't exist or something else. |
||
return url | ||
else: | ||
return None | ||
|
||
# Fix relative links | ||
if result[0] == '/': | ||
base_url = get_profile_url(website, '') | ||
result = base_url + result | ||
return result | ||
|
||
def check_username(website, username): | ||
url = get_profile_url(website, username) | ||
|
||
possible = check_format(website, username) | ||
|
||
if not possible: | ||
|
@@ -34,6 +81,8 @@ def check_username(website, username): | |
return { | ||
'status': code, | ||
'url': url, | ||
'avatar': get_avatar(website, username) if code == 200 | ||
else None, | ||
'possible': possible, | ||
} | ||
|
||
|
@@ -54,6 +103,7 @@ def check_username(website, username): | |
return { | ||
'status': code, | ||
'url': url, | ||
'avatar': get_avatar(website, username), | ||
'possible': possible, | ||
'profile': profile, | ||
} | ||
|
@@ -62,6 +112,7 @@ def check_username(website, username): | |
return { | ||
'status': r.get(url).status_code, | ||
'url': url, | ||
'avatar': get_avatar(website, username), | ||
'possible': possible, | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do we need different usernames for facebook?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As noted in the PR description, profiles on Facebook can be “hidden”, therefore having no avatar to display. These are public profile links.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok