Skip to content
This repository has been archived by the owner on Oct 28, 2023. It is now read-only.

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
fate0 committed Jun 16, 2017
0 parents commit cbaabcf
Show file tree
Hide file tree
Showing 17 changed files with 734 additions and 0 deletions.
63 changes: 63 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Byte-compiled / optimized / DLL files
.idea/
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# pyenv python configuration file
.python-version
32 changes: 32 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Config file for automatic testing at travis-ci.org
# This file will be regenerated if you run travis_pypi_setup.py

language: python
python: 3.5

env:
- TOXENV=py35
- TOXENV=py34
- TOXENV=py33
- TOXENV=py27
- TOXENV=py26
- TOXENV=pypy

# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
install: pip install -U tox

# command to run tests, e.g. python setup.py test
script: tox -e ${TOXENV}

# After you create the Github repo and add it to Travis, run the
# travis_pypi_setup.py script to finish PyPI deployment setup
deploy:
provider: pypi
distributions: sdist bdist_wheel
user: fate0
password:
secure: PLEASE_REPLACE_ME
on:
tags: true
repo: fate0/getproxy
condition: $TOXENV == py27
31 changes: 31 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

BSD License

Copyright (c) 2017, fate0
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.

* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.

1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include getproxy/data/GeoLite2-Country.mmdb
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# getproxy


8 changes: 8 additions & 0 deletions getproxy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-

__author__ = """fate0"""
__email__ = '[email protected]'
__version__ = '0.1.0'


from .getproxy import GetProxy
16 changes: 16 additions & 0 deletions getproxy/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-

import click
from getproxy import GetProxy


@click.command()
@click.option('--in-proxy', help='Input proxy file')
@click.option('--out-proxy', help='Output proxy file')
def main(in_proxy, out_proxy):
g = GetProxy(in_proxy, out_proxy)
g.start()


if __name__ == "__main__":
main()
Binary file added getproxy/data/GeoLite2-Country.mmdb
Binary file not shown.
232 changes: 232 additions & 0 deletions getproxy/getproxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals, absolute_import, division, print_function

import os
import sys
import json
import time
import copy
import signal
import logging

import requests
import gevent.pool
import gevent.monkey
import geoip2.database

from .utils import signal_name, load_object


gevent.monkey.patch_all()

logger = logging.getLogger(__file__)
logging.basicConfig(level=logging.INFO)


class GetProxy(object):
base_dir = os.path.dirname(os.path.realpath(__file__))

def __init__(self, input_proxies_file=None, output_proxies_file=None):
self.pool = gevent.pool.Pool(500)
self.plugins = []
self.web_proxies = []
self.valid_proxies = []
self.input_proxies = []
self.input_proxies_file = input_proxies_file
self.output_proxies_file = output_proxies_file
self.proxies_hash = {}
self.origin_ip = None
self.geoip_reader = None

self.steps = [
'init',
'load_input_proxies',
'validate_input_proxies',
'load_plugins',
'grab_web_proxies',
'validate_web_proxies',
'save_proxies'
]

self.cur_step = None

def _collect_result(self):
for plugin in self.plugins:
if not plugin.result:
continue

self.web_proxies.extend(plugin.result)

def _validate_proxy(self, proxy, scheme='http'):
country = proxy.get('country')
host = proxy.get('host')
port = proxy.get('port')

proxy_hash = '%s://%s:%s' % (scheme, host, port)

if proxy_hash in self.proxies_hash:
return

self.proxies_hash[proxy_hash] = True
request_proxies = {
scheme: "%s:%s" % (host, port)
}

request_begin = time.time()
try:
response_json = requests.get(
"%s://httpbin.org/get?show_env=1" % scheme,
proxies=request_proxies,
timeout=10
).json()
except:
return

request_end = time.time()
anonymity = self._check_proxy_anonymity(response_json)

if not country:
country = self.geoip_reader.country(host).country.iso_code

return {
"type": scheme,
"host": host,
"port": port,
"anonymity": anonymity,
"country": country,
"response_time": request_end - request_begin,
"from": proxy.get('from')
}

def _validate_proxy_list(self, proxies, timeout=300):
valid_proxies = []

def save_result(p):
if p:
valid_proxies.append(p)

for proxy in proxies:
self.pool.apply_async(self._validate_proxy, args=(proxy, 'http'), callback=save_result)
self.pool.apply_async(self._validate_proxy, args=(proxy, 'https'), callback=save_result)

self.pool.join(timeout=timeout)
self.pool.kill()

return valid_proxies

def _check_proxy_anonymity(self, response):
via = response.get('headers', {}).get('Via', '')

if self.origin_ip in json.dumps(response):
return 'transparent'
elif via and via != "1.1 vegur":
return 'anonymous'
else:
return 'high_anonymous'

def _request_force_stop(self, signum, _):
logger.warning("Cold shut down")
self.save_proxies()

raise SystemExit()

def _request_stop(self, signum, _):
logger.debug("Got signal %s" % signal_name(signum))

signal.signal(signal.SIGINT, self._request_force_stop)
signal.signal(signal.SIGTERM, self._request_force_stop)

logger.warning("Stopping after validate all ip."
"Press Ctrl+C again for a cold shutdown.")

cur_index = self.steps.index(self.cur_step)
grab_index = self.steps.index('grab_web_proxies')

if cur_index < grab_index:
self.save_proxies()
raise SystemExit()
elif cur_index == grab_index:
self.pool.kill()
self._collect_result()
self.validate_web_proxies()
self.save_proxies()
raise SystemExit()

def init(self):
logger.info("[*] Init")
signal.signal(signal.SIGINT, self._request_stop)
signal.signal(signal.SIGTERM, self._request_stop)

rp = requests.get('http://httpbin.org/get')
self.origin_ip = rp.json().get('origin', '')
logger.info("[*] Current Ip Address: %s" % self.origin_ip)

self.geoip_reader = geoip2.database.Reader(os.path.join(self.base_dir, 'data/GeoLite2-Country.mmdb'))

def load_input_proxies(self):
logger.info("[*] Load input proxies")

if self.input_proxies_file and os.path.exists(self.input_proxies_file):
with open(self.input_proxies_file) as fd:
for line in fd:
self.input_proxies.append(json.loads(line))

def validate_input_proxies(self):
logger.info("[*] Validate input proxies")
self.valid_proxies = self._validate_proxy_list(self.input_proxies)

def load_plugins(self):
logger.info("[*] Load plugins")
for plugin_name in os.listdir(os.path.join(self.base_dir, 'plugin')):
if os.path.splitext(plugin_name)[1] != '.py' or plugin_name == '__init__.py':
continue

try:
cls = load_object("getproxy.plugin.%s.Proxy" % os.path.splitext(plugin_name)[0])
except Exception as e:
logger.info("[-] Load Plugin %s error: %s" % (plugin_name, str(e)))
continue

inst = cls()
inst.proxies = copy.deepcopy(self.valid_proxies)
self.plugins.append(inst)

def grab_web_proxies(self):
logger.info("[*] Grab proxies")

for plugin in self.plugins:
self.pool.spawn(plugin.start)

self.pool.join(timeout=8 * 60)
self.pool.kill()

self._collect_result()

def validate_web_proxies(self):
logger.info("[*] Validate web proxies")
valid_proxies = self._validate_proxy_list(self.web_proxies)
self.valid_proxies.extend(valid_proxies)

def save_proxies(self):
logger.info("[*] Got %s valid proxies" % len(self.valid_proxies))
if self.output_proxies_file:
outfile = open(self.output_proxies_file, 'w')
else:
outfile = sys.stdout

for item in self.valid_proxies:
outfile.write("%s\n" % json.dumps(item))

outfile.flush()

def start(self):
for step in self.steps:
self.cur_step = step
getattr(self, self.cur_step)()


if __name__ == '__main__':
g = GetProxy()
g.start()
Empty file added getproxy/plugin/__init__.py
Empty file.
Loading

0 comments on commit cbaabcf

Please sign in to comment.