Files
jiangyuwei666 d8ae182023 web server done
2019-03-04 20:45:43 +08:00

86 lines
2.9 KiB
Python

from functions import get_selector, send_request
from requests import RequestException
import time
from functions import wtrte_csv
ttt = ["zaocan", "wucan", "wancan"]
base_url = "https://www.meishij.net/chufang/diy/"
def get_urls(menu_url):
"""
获取界面上的菜的url
:param menu_url: 某类食谱
:return: 该类食谱的所有菜的url
"""
urls_list = []
for i in range(11):
time.sleep(1)
try:
url = menu_url + str(i + 1)
print(url)
s = get_selector.get_selector(send_request.send_requests(url))
urls = s.xpath('//div[@class="listtyle1"]//a/@href')
urls_list.extend(urls)
print("page"+str(i), "done")
except Exception as e:
print("打开失败", e)
return urls_list
def get_info(url):
"""
获取详细内容
:param url: 菜品的url
:return: 返回的字典
"""
dic = {
"菜名": "",
"分类": "",
"口味": "",
"食材": "",
"主要工艺": "",
"制作时间": "",
"做法": "",
"图片url": "",
}
try:
time.sleep(1)
print("open", url)
s = get_selector.get_selector(send_request.send_requests(url))
dic["图片url"] = s.xpath('//div[@class="cp_headerimg_w"]//img/@src')[0]
dic["菜名"] = s.xpath('//h1[@class="title"]//a/text()')[0]
dic["分类"] = s.xpath('//dl[@class="yj_tags clearfix"]//a/text()')
dic["主要工艺"] = s.xpath('//li[@class="w127"]//a/text()')[0]
dic["口味"] = s.xpath('//li[@class="w127 bb0"]//a/text()')[0]
dic["制作时间"] = s.xpath('//li[@class="w270 bb0 br0"]//div[@class="processing_w"]//a/text()')[0]
zhuliao = s.xpath('//div[@class="c"]//h4/child::*/text()')
fuliao = s.xpath('//div[@class="yl fuliao clearfix"]//ul[@class="clearfix"]/descendant::*/text()')
cailiao = []
for i in range(len(zhuliao)):
if i % 2 == 0:
temp = zhuliao[i] + ":" + zhuliao[i + 1]
cailiao.append(temp)
for i in range(len(fuliao)):
if i % 2 == 0:
temp = fuliao[i] + ":" + fuliao[i + 1]
cailiao.append(temp)
dic["食材"] = cailiao
steps = s.xpath('//div[@class="editnew edit"]//div//p/text()')
for i in range(len(steps)):
steps[i] = str(i + 1) + steps[i]
dic["做法"] = steps
print(dic.get("菜名"), "done")
except Exception as e:
print("获取菜失败", e)
return dic
if __name__ == '__main__':
path = r"E:\datapy\心食谱\\"
for i in ttt:
time.sleep(1)
menu_list = get_urls(base_url + i + "/?&page=")
dic_list = []
for menu in menu_list:
time.sleep(1)
dic_list.append(get_info(menu))
wtrte_csv.write_csv(dic_list, path + i + ".csv" )
print(i, "done")