【Python xpath爬虫实战】抓取下厨房的菜谱信息,并存储到MySQL数据库
获取标题,详情页链接,原材料列表,七天内做过人数,作者字段。获取全部的本周最受欢迎菜谱信息。数据需要存入mysql。
·
获取全部的本周最受欢迎菜谱信息
获取标题,详情页链接,原材料列表,七天内做过人数,作者字段
数据需要存入mysql
import requests
from lxml import etree
import pymysql
db_config = {
'host': '127.0.0.1', # 数据库地址
'user': 'root', # 数据库用户名
'password': '123456', # 数据库密码
'database': 'cai', # 数据库名
'charset': 'utf8mb4',
'cursorclass': pymysql.cursors.DictCursor
}
url = 'https://www.xiachufang.com/explore/'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
}
proxies = {
"http":"http://d2488327798:i90h4h62@114.232.109.141:15943",
"https":"http://d2488327798:i90h4h62@114.232.109.141:15943"
}
response = requests.get(url=url, headers=headers, proxies=proxies)
content = response.content.decode()
# 连接数据库
connection = pymysql.connect(**db_config)
tree = etree.HTML(content)
# 拿到所有的li
li_list = tree.xpath('/html/body/div[3]/div/div/div[1]/div[1]/div/div[2]/div[1]/ul/li')
try:
with connection.cursor() as cursor:
for li in li_list:
name = ''.join(li.xpath('./div/a/div/img/@alt'))
src = 'https://www.xiachufang.com'+''.join(li.xpath('./div/div/p[1]/a/@href'))
material = ','.join(li.xpath('./div/div/p[2]/a/text()'))
count = ''.join(li.xpath('./div/div/p[3]/span/text()'))
author = ''.join(li.xpath('./div/div/p[4]//a/text()'))
print(src)
# 插入数据
sql = "INSERT INTO deatil (name, src, material, count, author) VALUES (%s, %s, %s, %s, %s)"
cursor.execute(sql, (name, src, material, count, author))
# 提交事务
connection.commit()
finally:
connection.close()
更多推荐
已为社区贡献1条内容
所有评论(0)