forked from jhnwr/rotatingproxies
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapeproxies.py
37 lines (33 loc) · 1.23 KB
/
scrapeproxies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import requests
from bs4 import BeautifulSoup
import random
import concurrent.futures
#get the list of free proxies
def getProxies():
r = requests.get('https://free-proxy-list.net/')
soup = BeautifulSoup(r.content, 'html.parser')
table = soup.find('tbody')
proxies = []
for row in table:
if row.find_all('td')[4].text =='elite proxy':
proxy = ':'.join([row.find_all('td')[0].text, row.find_all('td')[1].text])
proxies.append(proxy)
else:
pass
return proxies
def extract(proxy):
#this was for when we took a list into the function, without conc futures.
#proxy = random.choice(proxylist)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0'}
try:
#change the url to https://httpbin.org/ip that doesnt block anything
r = requests.get('https://httpbin.org/ip', headers=headers, proxies={'http' : proxy,'https': proxy}, timeout=1)
print(r.json(), r.status_code)
except:
pass
return proxy
proxylist = getProxies()
#print(len(proxylist))
#check them all with futures super quick
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(extract, proxylist)