Le site ayant changé, une nouvelle version
#!/usr/bin/python
#-*- coding:utf-8 -*-
from urllib2 import urlopen
import sys
from BeautifulSoup import BeautifulSoup
__version__ = '0.1'
cat_elite = 0
cat_transp = 1
cat_anon = 2
cat_other = 3
def get_valid_countries():
res = set()
# Use any country to get the full countries list
country = country.lower()
html = urlopen('http://www.proxynova.com/proxy-server-list/country-gb/').read()
soup = BeautifulSoup(html)
asel = soup.find('select', attrs={'name':'proxy_country'})
aopt = asel.findAll('option')
for opt in aopt:
if opt.has_key('value') and opt['value'] != u'' :
res.add(opt['value'])
return res
def get_proxy(country):
country = country.lower()
html = urlopen('http://www.proxynova.com/proxy-server-list/country-' + country + '/').read()
soup = BeautifulSoup(html)
atr = soup.find('tbody').findAll('tr')
tbl = []
fl = True
for tr in atr:
atd = tr.findAll('td')
n = 0
for x in atd:
if x.has_key('colspan'):
continue
# ip, port, x, x, x, category
if n < 2:
if fl:
tmp = x
else:
port = x
fl = not fl
if n == 5:
category = x.span.string
cat = x.span.string
if cat == u'Elite':
cat = cat_elite
elif cat == u'Transparent':
cat = cat_transp
elif cat == u'Anonymous':
cat = cat_anon
else:
cat = cat_other
tbl.append([tmp.span.contents[0], port, cat])
n+=1
lst = []
for addr, port, category in tbl:
try:
v = addr.string.strip()
v = v[v.find('(')+ 1:-2]
v = v.replace('"', '').replace(' ','').replace('+','')
if port.a is not None:
proxy = u"%s:%s" % (v, str(port.a.string))
else:
vv = port.contents[0].replace('\t','').replace('\n','').replace('\r', '')
proxy = u"%s:%s" % (v, vv)
lst.append((category, proxy))
#mettre la bonne exception
except Exception,e:
print e
lst.sort()
return lst
if __name__ == '__main__':
if len(sys.argv) == 1:
print get_valid_countries()
else:
print get_proxy(sys.argv[1])
exemple
python proxy.py ca
affiche des proxys canadiens, d'abord ceux de catégorie 0 (élite)
[(0, u'178.82.51.:80'), (0, u'213.144.132.1:80'), (0, u'31.11.3.:80'), (0, u'80.218.81.1:80'), (0, u'93.184.16.:81'), (2, u'46.28.206.:3128'), (2, u'62.202.16.:3128'), (2, u'81.13.132.1:8080'), (2, u'81.13.184.1:8080'), (2, u'81.13.186.:80'), (2, u'81.13.186.:8080'), (2, u'81.13.235.:80')]
Edit : ah problème avec les proxys suisses, ce ne récupère pas l'adresse IP complète, je regarde...