diff --git a/.travis.yml b/.travis.yml index 7e95120..1b61218 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,4 +14,4 @@ before_install: - cat test-requirements.txt >> requirements.txt script: - - python -m pytest -s + - python -m pytest -s -v diff --git a/requirements.txt b/requirements.txt index e46e7c4..1ed3330 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,5 @@ termcolor==1.1.0 Werkzeug==0.11.3 wheel==0.24.0 pyyaml==3.12 +beautifulsoup4==4.6.0 +parameterized==0.6.1 diff --git a/tests/test_data.yml b/tests/test_data.yml index ae874cb..8abef28 100644 --- a/tests/test_data.yml +++ b/tests/test_data.yml @@ -1,5 +1,8 @@ --- facebook: + avatar_usernames: + - "gyanlakhwani" + - "brianchesky" taken_usernames: - "manu.chroma" - "vijeth.kv" diff --git a/tests/test_get_avatar.py b/tests/test_get_avatar.py new file mode 100644 index 0000000..4684fe9 --- /dev/null +++ b/tests/test_get_avatar.py @@ -0,0 +1,56 @@ +import unittest +import json +import requests as r +import yaml +import pytest +from parameterized import parameterized +import logging +import username_api +import os.path + +data = yaml.load(open(os.path.join('tests', 'test_data.yml'))) +websites = yaml.load(open('websites.yml')) + + +def load_test_cases(type): + res = [] + for website in data: + if website == 'behance': + users = [None] + elif type == 'with_avatar': + if 'avatar_usernames' in data[website]: + users = data[website]['avatar_usernames'] + else: + users = data[website]['taken_usernames'] + else: + users = data[website]['available_usernames'] + res.extend((website, user) for user in users) + + return res + + +def custom_name_func(testcase_func, param_num, param): + return '%s_%s' % ( + testcase_func.__name__, + parameterized.to_safe_name('_'.join(str(x) for x in param.args)), + ) + + +class TestGet_avatar(object): + + @parameterized.expand(load_test_cases('with_avatar'), + testcase_func_name=custom_name_func) + def test_with_avatar(self, website, user): + if not user: + pytest.skip("website not supported") + link = username_api.check_username(website, user)['avatar'] + response = r.get(link) + assert(response.headers.get('content-type', '').startswith('image/') or + response.headers.get('content-type') == 'application/octet-stream') + + @parameterized.expand(load_test_cases('without_avatar'), + testcase_func_name=custom_name_func) + def test_without_avatar(self, website, user): + if not user: + pytest.skip("website not supported") + assert username_api.check_username(website, user)['avatar'] is None diff --git a/tests/test_username_api.py b/tests/test_username_api.py index 483ddea..cddde60 100644 --- a/tests/test_username_api.py +++ b/tests/test_username_api.py @@ -6,8 +6,9 @@ import pytest import logging import username_api +import os.path -data = yaml.load(open('tests/test_data.yml')) +data = yaml.load(open(os.path.join('tests', 'test_data.yml'))) websites = yaml.load(open('websites.yml')) invalid_username = '$very%long{invalid}user(name)' @@ -28,10 +29,9 @@ def get_expected_response(website, user, status): return { 'possible': True, 'status': status, - 'url': websites['urls'].get(website, 'https://{w}.com/{u}').format( - w=website, - u=user - ) + 'url': username_api.get_profile_url(website, user), + 'avatar': username_api.get_avatar(website, user) if status == 200 + else None } class TestUsernameApi(object): @@ -337,7 +337,7 @@ def test_pinterest_format_checking(self): json_resp = json.loads(resp.get_data().decode()) assert { 'possible': False, - 'url': 'https://in.pinterest.com/{}/'.format(invalid_username) + 'url': 'https://in.pinterest.com/{}'.format(invalid_username) } == json_resp def test_instagram_format_checking(self): @@ -364,5 +364,5 @@ def test_facebook_format_checking(self): json_resp = json.loads(resp.get_data().decode()) assert { 'possible': False, - 'url': 'https://mbasic.facebook.com/{}/'.format(invalid_username) + 'url': 'https://mbasic.facebook.com/{}'.format(invalid_username) } == json_resp diff --git a/username_api.py b/username_api.py index 1099040..08065c3 100644 --- a/username_api.py +++ b/username_api.py @@ -1,7 +1,9 @@ from flask import Flask, jsonify from flask.ext.cors import CORS, cross_origin import requests as r +from bs4 import BeautifulSoup +import os import sys import re import yaml @@ -12,12 +14,57 @@ patterns = yaml.load(open('websites.yml')) -def check_username(website, username): - url = patterns['urls'].get(website, 'https://{w}.com/{u}').format( +def get_profile_url(website, username): + return patterns['urls'].get(website, 'https://{w}.com/{u}').format( w=website, u=username ) +def get_avatar(website, username): + data = patterns['avatar'][website] + + if not data: + return None + + url = get_profile_url(website, username) + if 'url' in data: + url = data['url'].format(u=username) + + response = r.get(url) + if response.status_code == 404: + return None + + if data == 'opengraph': + # Look in metadata for image. + soup = BeautifulSoup(response.text, 'html.parser') + result = [item.attrs['content'] for item in soup('meta') + if item.has_attr('property') and + item.attrs['property'].lower() == 'og:image'] + if not result or not result[0]: + return None + result = result[0] + elif 'key' in data: + # Searches for "`key`": "`link`" + regex = re.compile('[\'\"]' + re.escape(data['key']) + + '[\'\"]:(\s)?[\'\"](?P[^\s]+)[\'\"]') + result = re.search(regex, response.text) + if not result: + return None + result = result.group('link') + elif response.headers.get('content-type', '').startswith('image/'): + return url + else: + return None + + # Fix relative links + if result[0] == '/': + base_url = get_profile_url(website, '') + result = base_url + result + return result + +def check_username(website, username): + url = get_profile_url(website, username) + possible = check_format(website, username) if not possible: @@ -34,6 +81,8 @@ def check_username(website, username): return { 'status': code, 'url': url, + 'avatar': get_avatar(website, username) if code == 200 + else None, 'possible': possible, } @@ -54,6 +103,7 @@ def check_username(website, username): return { 'status': code, 'url': url, + 'avatar': get_avatar(website, username), 'possible': possible, 'profile': profile, } @@ -62,6 +112,7 @@ def check_username(website, username): return { 'status': r.get(url).status_code, 'url': url, + 'avatar': get_avatar(website, username), 'possible': possible, } diff --git a/websites.yml b/websites.yml index fe80ba7..e5468eb 100644 --- a/websites.yml +++ b/websites.yml @@ -1,9 +1,9 @@ --- urls: behance: https://{w}.net/{u} - pinterest: https://in.{w}.com/{u}/ + pinterest: https://in.{w}.com/{u} tumblr: https://{u}.{w}.com - facebook: https://mbasic.{w}.com/{u}/ + facebook: https://mbasic.{w}.com/{u} username_patterns: pinterest: characters: a-zA-Z0-9_ @@ -46,3 +46,15 @@ username_patterns: invalid_patterns: - "\\.(com|net)" - "(\\.)\\1{1,}" # consecutive '.' not allowed +avatar: + behance: false + facebook: opengraph + github: opengraph + gitlab: opengraph + instagram: opengraph + pinterest: + key: image_xlarge_url + soundcloud: opengraph + tumblr: opengraph + twitter: + url: https://twitter.com/{u}/profile_image?size=original