利用python爬取飞猪信息_飞猪爬虫项目
import requestsfrom bs4 import BeautifulSoup# 爬取飞猪IP免费代理class SpiderApp:#初始化属性def __init__(self):# 地址、头部、请求对象、解析对象self.url = "https://www.feizhuip.com/?source=baidu&keyword=feizhuIP"self.head = ..
import requests
from bs4 import BeautifulSoup
# 爬取飞猪IP免费代理
class SpiderApp:
# 初始化属性
def __init__(self):
# 地址、头部、请求对象、解析对象
self.url = "https://www.feizhuip.com/?source=baidu&keyword=feizhuIP"
self.head = {"user-agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36"}
self.req = requests
self.bs = BeautifulSoup
# 存储page1中的路径和标题
self.href_list = []
# 响应对象
self.response = ""
# 请求第一级页面:获取二级页面的路径和标题
def sendReqPage1(self):
# 1 发起请求
self.response = self.req.get(url=self.url,headers=self.head)
# 2 查看状态
print("code-",self.response.status_code)
# 3 解析数据 解析器:html5lib需要安装
soup = self.bs(self.response.text,"html.parser")
title = soup.title.string
print("标题",title)
# 4 通过标签条件查询标签
# find_all div len=3 [index=2] select "p.list span.date"
span
原文链接:https://blog.csdn.net/chen_zan_yu_/article/details/106683049
更多推荐
所有评论(0)