Files
jiangyuwei666 d8ae182023 web server done
2019-03-04 20:45:43 +08:00

51 lines
2.0 KiB
Python

import requests
from lxml import etree
import time
from requests.exceptions import RequestException
import random
base_url = "https://www.kuaidaili.com/free/inha/"
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
}
def get_proxys(page):
try:
proxy_list = []
print("正在抓代理...")
for i in range(page):
time.sleep(5)
url = base_url + str(i + 1) + "/"
r = requests.get(url, headers=headers, timeout=10)
if r.status_code == 200:
s = etree.HTML(r.text)
proxy_ip = s.xpath('//tbody//tr//td[@data-title="IP"]/text()')
proxy_port = s.xpath('//tbody//tr//td[@data-title="PORT"]/text()')
proxy_kind = s.xpath('//tbody//tr//td[@data-title="类型"]/text()')
if len(proxy_ip) == len(proxy_port) == len(proxy_kind):
for i in range(len(proxy_ip)):
proxy = {
'HTTP': proxy_ip[i] + ":" + proxy_port[i],
'HTTPS': proxy_ip[i] + ":" + proxy_port[i]
}
proxy_list.append(proxy)
else:
print(str(r.status_code) + "---" + r.url)
print("代理完成")
return proxy_list
except RequestException as e:
print("代理error:" , e)
def test_proxy(proxy_list):
try:
print("正在选择代理...")
proxy = random.choice(proxy_list)
test_url = "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&tn=02049043_21_pg&wd=" + str(random.random())
r = requests.get(test_url, proxies=proxy, headers=headers, timeout=10)
if r.status_code == 200:
print("代理选择成功")
return proxy
else:
print("代理选择失败,重新选择")
test_proxy(proxy_list)
except:
print("验证失败")