Files
NutritionMaster/NutritionMasterSpider/nutrition_add.py
jiangyuwei666 d8ae182023 web server done
2019-03-04 20:45:43 +08:00

76 lines
2.3 KiB
Python

from functions import send_request, get_selector
import csv
base_url = "http://www.boohee.com"
search_url = "http://www.boohee.com/food/search?keyword="
test_list = ["碘盐", ]
def get_url(name):
s = get_selector.get_selector(send_request.send_requests(search_url + name))
url = s.xpath('//li[@class="item clearfix"][1]//h4//a/@href')[0]
url = base_url + url
print(url)
return url
def get_info(url):
s = get_selector.get_selector(send_request.send_requests(url))
info = s.xpath('//div[@class="content"]//dd[2]/following::dd//span/text()')
info_dic = {}
for i in range(int(len(info) / 2)):
try:
info_dic[info[i * 2]] = info[(i + 1) * 2 - 1]
except:
pass
for ingredient in info_dic:
try:
float(info_dic[ingredient])
except:
info_dic[ingredient] = '-1'
return info_dic
def read_csv(path):
"""
:param path: csv文件的路径
:return:
"""
old = []
with open(path, 'r') as file:
reader = csv.DictReader(file)
for i in reader:
try:
old.append(i)
except:
pass
return old#这里记得改了
def write_csv(path, old_dic):
"""
:param path: 写入文件的路径
:return:
"""
with open(path, 'w', newline='') as file:
fieldnames = ['', 'material_name', 'name_correct', '镁(毫克)', '维生素A(微克)', '铁(毫克)', '硒(微克)', '钾(毫克)', '铜(毫克)', '磷(毫克)', '胡萝卜素(微克)', '碳水化合物(克)', '锰(毫克)', '钠(毫克)', '胆固醇(毫克)', '烟酸(毫克)', '热量(大卡)', '核黄素(毫克)', '锌(毫克)', '维生素C(毫克)', '硫胺素(毫克)', '脂肪(克)', '钙(毫克)', '维生素E(毫克)', '纤维素(克)', '蛋白质(克)']
writer = csv.DictWriter(file, fieldnames)
writer.writeheader()
for i in old_dic:
try:
writer.writerow(i)
except:
pass
m = read_csv(r"F:\Tencent\TIM\Tencent Files\739843128\FileRecv\material_names.csv")
for row in m:
try:
row.update(get_info(get_url(row.get('name_correct'))))
# for name in row:
# if row.get(name) is None:
# row[name] = "-1"
print(row)
except Exception as e:
print(e)
write_csv(r"E:\datapy\test.csv", m)