93 lines
2.9 KiB
Python
93 lines
2.9 KiB
Python
from selenium import webdriver
|
|
from functions import send_request, get_selector
|
|
import time
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
import os
|
|
|
|
base_url = "http://www.shufabeitie.com"
|
|
person_name_list = ["东晋王羲之", "元代赵孟頫", "明代文徵明", "唐代颜真卿"]
|
|
test_name_list = ["东晋王羲之"]
|
|
|
|
def get_urls(list):
|
|
"""
|
|
获取该网站所有的贴
|
|
:param list: person_name_list
|
|
:return:
|
|
"""
|
|
urls = []
|
|
for name in list:
|
|
try:
|
|
time.sleep(5)
|
|
url = base_url + "/shufa/" + name
|
|
print(name, "start")
|
|
s = get_selector.get_selector(send_request.send_requests(url))
|
|
result = s.xpath('//div[@class="caption ellipsis"]//a/@href')
|
|
for i in range(len(result)):
|
|
result[i] = base_url + result[i]
|
|
print(name, "done")
|
|
urls.extend(result)
|
|
except Exception as e:
|
|
print(e)
|
|
print(urls)
|
|
return urls
|
|
|
|
def get_img_element_id(url):
|
|
try:
|
|
s = get_selector.get_selector(send_request.send_requests(url))
|
|
id_list = s.xpath('//div[@id="beitie-imgs-container"]//div/@id')
|
|
return id_list
|
|
except Exception as e:
|
|
print(e)
|
|
|
|
def get_info(urls):
|
|
"""
|
|
从所有的贴中取出图片
|
|
:param urls: 所有的地址
|
|
:return:
|
|
"""
|
|
browser = webdriver.Chrome()
|
|
wait = WebDriverWait(browser, 10)
|
|
all_list_dic = {}
|
|
|
|
for url in urls[:1]:
|
|
time.sleep(2)
|
|
browser.get(url)
|
|
print("正在处理", url)
|
|
btn = wait.until(EC.element_to_be_clickable((By.ID, "beitie-pagination-next")))
|
|
#btn = browser.find_element_by_id("")
|
|
id_list = get_img_element_id(url)
|
|
img_list = []
|
|
for img_id in id_list:
|
|
try:
|
|
time.sleep(3)
|
|
# img_elment = browser.find_element_by_xpath('//div[@id="{id}"]/img'.format(id=img_id))#取属性
|
|
img_element = wait.until(
|
|
EC.visibility_of_element_located((By.XPATH, '//div[@id="{id}"]/img'.format(id=img_id))))
|
|
img = img_element.get_attribute('src')
|
|
print(img)
|
|
img_list.append(img)
|
|
time.sleep(3)
|
|
btn.click()
|
|
except Exception as e:
|
|
print("下载图片出错", e)
|
|
write_in(img_list)
|
|
|
|
def create_dir(name):
|
|
dir_path = r"E:\datapy\bb\东晋王羲之" + "\\" + name
|
|
os.mkdir(dir_path)
|
|
return dir_path
|
|
|
|
|
|
def write_in(urls):
|
|
dir_name = urls[0].split('/')[4]
|
|
path = create_dir(dir_name)
|
|
for url in urls:
|
|
create_dir(dir_name)
|
|
file_name = path + "\\" + url.split('/')[-1] + '.jpg'
|
|
with open(file_name, 'wb') as file:
|
|
file.write(send_request.download_img(url))
|
|
|
|
get_info(get_urls(test_name_list))
|