HTTP=221.10.126.191:2226
HTTP=124.88.67.20:80
HTTP=119.6.136.122:80
HTTP=121.31.192.165:8123
HTTP=183.61.71.112:8888
HTTP=124.88.67.19:80
HTTP=121.193.143.249:80
HTTP=121.31.100.166:8123
HTTP=60.251.63.159:8080
HTTP=118.180.15.152:8102
HTTP=120.25.171.183:8080
HTTP=101.201.235.141:8000
HTTP=123.57.190.51:7777
HTTP=183.129.178.14:8080
HTTP=180.103.131.65:808
HTTP=202.75.210.45:7777
HTTP=124.88.67.23:80
HTTP=112.117.188.27:8118
HTTP=121.33.226.167:3128
HTTP=183.131.76.27:8888
HTTP=218.244.149.184:8888
HTTP=175.0.84.71:8998
HTTP=182.90.252.10:2226
HTTP=39.1.47.7:8080
#coding = utf-8
import httplib
import time
import urllib
import threading
import urllib2
import urllib2 as url
import random
import httplib
import urllib
from bs4 import BeautifulSoup
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool
user_agents = ['Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20130406 Firefox/23.0', \
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0', \
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533+ \
(KHTML, like Gecko) Element Browser 5.0', \
'IBM WebExplorer /v0.94', 'Galaxy/1.0 [en] (Mac OS X 10.5.6; U; en)', \
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', \
'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14', \
'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) \
Version/6.0 Mobile/10A5355d Safari/8536.25', \
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/28.0.1468.0 Safari/537.36', \
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0; TheWorld)']
def get_proxy():
pf = open('proxy.txt' , 'w')
for page in range(1, 200):
url = '<http://www.xici.net.co/nn/'>; + str(page)
request = urllib2.Request(url)
user_agent = random.choice(user_agents)
request.add_header('User-agent', user_agent)
response = urllib2.urlopen(request)
html = response.read()
soup = BeautifulSoup(html,'lxml')
trs = soup.find('table', id='ip_list').find_all('tr')
for tr in trs[1:]:
tds = tr.find_all('td')
ip = tds[1].text.strip()
port = tds[2].text.strip()
protocol = tds[5].text.strip()
if protocol == 'HTTP':
pf.write('%s=%s:%s\n' % (protocol, ip, port) )
print '%s=%s:%s' % (protocol, ip, port)
pf.close()
def var_proxy():
inFile = open('proxy.txt', 'r')
outFile = open('available.txt', 'w')
lines = inFile.readlines()
inFile.close()
#for line in inFile.readlines():
def check(line):
protocol, proxy = line.strip().split('=')
ip, port = proxy.split(':')
headers = {'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': '',
'Referer':'[http://www.baidu.com'}](http://www.baidu.com%27%7D/)
try:
conn = httplib.HTTPConnection(proxy, timeout=3.0)
conn.request(method='GET', url='<http://1212.ip138.com/ic.asp',headers=headers>)
res = conn.getresponse()
html_doc = res.read()
if html_doc.find(ip)>0:
print proxy
outFile.write('%s\n' % (proxy) )
else:
print "error!"
except Exception, e:
print e
pass
pool = ThreadPool(20)
result = pool.map(check,lines)
pool.close()
pool.join()
outFile.close()
if **name** == '**main**':
print "getproxy 1"
print "varproxy 2"
flag = raw_input()
if flag == "1":
get_proxy()
elif flag == "2":
var_proxy()
else:
print "input error!"
点击收藏 | 0
关注 | 0
打赏