获取全部的本周最受欢迎菜谱信息

本周最受欢迎菜谱_下厨房

获取标题,详情页链接,原材料列表,七天内做过人数,作者字段

数据需要存入mysql

import requests
from lxml import etree
import pymysql


db_config = {
    'host': '127.0.0.1',  # 数据库地址
    'user': 'root',  # 数据库用户名
    'password': '123456',  # 数据库密码
    'database': 'cai',  # 数据库名
    'charset': 'utf8mb4',
    'cursorclass': pymysql.cursors.DictCursor
}
url = 'https://www.xiachufang.com/explore/'

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
}

proxies = {
    "http":"http://d2488327798:i90h4h62@114.232.109.141:15943",
    "https":"http://d2488327798:i90h4h62@114.232.109.141:15943"
}


response = requests.get(url=url, headers=headers, proxies=proxies)
content = response.content.decode()

# 连接数据库
connection = pymysql.connect(**db_config)

tree = etree.HTML(content)

# 拿到所有的li
li_list = tree.xpath('/html/body/div[3]/div/div/div[1]/div[1]/div/div[2]/div[1]/ul/li')

try:
    with connection.cursor() as cursor:
        for li in li_list:
            name = ''.join(li.xpath('./div/a/div/img/@alt'))
            src = 'https://www.xiachufang.com'+''.join(li.xpath('./div/div/p[1]/a/@href'))
            material = ','.join(li.xpath('./div/div/p[2]/a/text()'))
            count = ''.join(li.xpath('./div/div/p[3]/span/text()'))
            author = ''.join(li.xpath('./div/div/p[4]//a/text()'))
            print(src)
            # 插入数据
            sql = "INSERT INTO deatil (name, src, material, count, author) VALUES (%s, %s, %s, %s, %s)"
            cursor.execute(sql, (name, src, material, count, author))

            # 提交事务
    connection.commit()

finally:
    connection.close()

Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐