Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avatars: Fetch avatars for websites with taken usernames #42

Merged
merged 2 commits into from
Dec 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ before_install:
- cat test-requirements.txt >> requirements.txt

script:
- python -m pytest -s
- python -m pytest -s -v
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ termcolor==1.1.0
Werkzeug==0.11.3
wheel==0.24.0
pyyaml==3.12
beautifulsoup4==4.6.0
parameterized==0.6.1
3 changes: 3 additions & 0 deletions tests/test_data.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
---
facebook:
avatar_usernames:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need different usernames for facebook?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As noted in the PR description, profiles on Facebook can be “hidden”, therefore having no avatar to display. These are public profile links.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

- "gyanlakhwani"
- "brianchesky"
taken_usernames:
- "manu.chroma"
- "vijeth.kv"
Expand Down
56 changes: 56 additions & 0 deletions tests/test_get_avatar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import unittest
import json
import requests as r
import yaml
import pytest
from parameterized import parameterized
import logging
import username_api
import os.path

data = yaml.load(open(os.path.join('tests', 'test_data.yml')))
websites = yaml.load(open('websites.yml'))


def load_test_cases(type):
res = []
for website in data:
if website == 'behance':
users = [None]
elif type == 'with_avatar':
if 'avatar_usernames' in data[website]:
users = data[website]['avatar_usernames']
else:
users = data[website]['taken_usernames']
else:
users = data[website]['available_usernames']
res.extend((website, user) for user in users)

return res


def custom_name_func(testcase_func, param_num, param):
return '%s_%s' % (
testcase_func.__name__,
parameterized.to_safe_name('_'.join(str(x) for x in param.args)),
)


class TestGet_avatar(object):

@parameterized.expand(load_test_cases('with_avatar'),
testcase_func_name=custom_name_func)
def test_with_avatar(self, website, user):
if not user:
pytest.skip("website not supported")
link = username_api.check_username(website, user)['avatar']
response = r.get(link)
assert(response.headers.get('content-type', '').startswith('image/') or
response.headers.get('content-type') == 'application/octet-stream')

@parameterized.expand(load_test_cases('without_avatar'),
testcase_func_name=custom_name_func)
def test_without_avatar(self, website, user):
if not user:
pytest.skip("website not supported")
assert username_api.check_username(website, user)['avatar'] is None
14 changes: 7 additions & 7 deletions tests/test_username_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import pytest
import logging
import username_api
import os.path

data = yaml.load(open('tests/test_data.yml'))
data = yaml.load(open(os.path.join('tests', 'test_data.yml')))
websites = yaml.load(open('websites.yml'))

invalid_username = '$very%long{invalid}user(name)'
Expand All @@ -28,10 +29,9 @@ def get_expected_response(website, user, status):
return {
'possible': True,
'status': status,
'url': websites['urls'].get(website, 'https://{w}.com/{u}').format(
w=website,
u=user
)
'url': username_api.get_profile_url(website, user),
'avatar': username_api.get_avatar(website, user) if status == 200
else None
}

class TestUsernameApi(object):
Expand Down Expand Up @@ -337,7 +337,7 @@ def test_pinterest_format_checking(self):
json_resp = json.loads(resp.get_data().decode())
assert {
'possible': False,
'url': 'https://in.pinterest.com/{}/'.format(invalid_username)
'url': 'https://in.pinterest.com/{}'.format(invalid_username)
} == json_resp

def test_instagram_format_checking(self):
Expand All @@ -364,5 +364,5 @@ def test_facebook_format_checking(self):
json_resp = json.loads(resp.get_data().decode())
assert {
'possible': False,
'url': 'https://mbasic.facebook.com/{}/'.format(invalid_username)
'url': 'https://mbasic.facebook.com/{}'.format(invalid_username)
} == json_resp
55 changes: 53 additions & 2 deletions username_api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from flask import Flask, jsonify
from flask.ext.cors import CORS, cross_origin
import requests as r
from bs4 import BeautifulSoup

import os
import sys
import re
import yaml
Expand All @@ -12,12 +14,57 @@

patterns = yaml.load(open('websites.yml'))

def check_username(website, username):
url = patterns['urls'].get(website, 'https://{w}.com/{u}').format(
def get_profile_url(website, username):
return patterns['urls'].get(website, 'https://{w}.com/{u}').format(
w=website,
u=username
)

def get_avatar(website, username):
data = patterns['avatar'][website]

if not data:
return None

url = get_profile_url(website, username)
if 'url' in data:
url = data['url'].format(u=username)

response = r.get(url)
if response.status_code == 404:
return None

if data == 'opengraph':
# Look in metadata for image.
soup = BeautifulSoup(response.text, 'html.parser')
result = [item.attrs['content'] for item in soup('meta')
if item.has_attr('property') and
item.attrs['property'].lower() == 'og:image']
if not result or not result[0]:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is check for both result and result[0] needed here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, some website returned an empty string for some reason. This is for that. (I can't remember which one, I think it was fb)

return None
result = result[0]
elif 'key' in data:
# Searches for "`key`": "`link`"
regex = re.compile('[\'\"]' + re.escape(data['key']) +
'[\'\"]:(\s)?[\'\"](?P<link>[^\s]+)[\'\"]')
result = re.search(regex, response.text)
if not result:
return None
result = result.group('link')
elif response.headers.get('content-type', '').startswith('image/'):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one applies for twitter?

Copy link
Contributor Author

@nalinbhardwaj nalinbhardwaj Dec 21, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. It redirects to homepage if the user doesn't exist or something else.

return url
else:
return None

# Fix relative links
if result[0] == '/':
base_url = get_profile_url(website, '')
result = base_url + result
return result

def check_username(website, username):
url = get_profile_url(website, username)

possible = check_format(website, username)

if not possible:
Expand All @@ -34,6 +81,8 @@ def check_username(website, username):
return {
'status': code,
'url': url,
'avatar': get_avatar(website, username) if code == 200
else None,
'possible': possible,
}

Expand All @@ -54,6 +103,7 @@ def check_username(website, username):
return {
'status': code,
'url': url,
'avatar': get_avatar(website, username),
'possible': possible,
'profile': profile,
}
Expand All @@ -62,6 +112,7 @@ def check_username(website, username):
return {
'status': r.get(url).status_code,
'url': url,
'avatar': get_avatar(website, username),
'possible': possible,
}

Expand Down
16 changes: 14 additions & 2 deletions websites.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
---
urls:
behance: https://{w}.net/{u}
pinterest: https://in.{w}.com/{u}/
pinterest: https://in.{w}.com/{u}
tumblr: https://{u}.{w}.com
facebook: https://mbasic.{w}.com/{u}/
facebook: https://mbasic.{w}.com/{u}
username_patterns:
pinterest:
characters: a-zA-Z0-9_
Expand Down Expand Up @@ -46,3 +46,15 @@ username_patterns:
invalid_patterns:
- "\\.(com|net)"
- "(\\.)\\1{1,}" # consecutive '.' not allowed
avatar:
behance: false
facebook: opengraph
github: opengraph
gitlab: opengraph
instagram: opengraph
pinterest:
key: image_xlarge_url
soundcloud: opengraph
tumblr: opengraph
twitter:
url: https://twitter.com/{u}/profile_image?size=original