python爬虫练习#动态数据翻页爬取
【代码】python爬虫练习#动态数据翻页爬取。
·
案例需求:
- 采集沪深京股A股的所有数据,并存储到MongoDB中
代码实现:
- 导入需要用到的模块
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pymongo
- 使用selenium爬取动态数据
url = 'http://quote.eastmoney.com/center/gridlist.html#hs_a_board'
driver = webdriver.Edge()
driver.get(url)
driver.implicitly_wait(10)
client = pymongo.MongoClient('localhost', 27017)
db = client.money.china
def find_elements(xpath):
return driver.find_elements(By.XPATH, xpath)
try:
while True:
wait = WebDriverWait(driver, 10)
# 查找所有需要的元素
num = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[1]')
code = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[2]/a')
name = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[3]/a')
stock_bar = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[4]/a[1]')
capital_flow = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[4]/a[2]')
record = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[4]/a[3]')
latest_price = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[5]/span')
chg = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[6]/span')
rise_and_fall_amount = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[7]/span')
turnover = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[8]')
transaction_amount = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[9]')
amplitude = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[10]')
highest = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[11]/span')
lowest = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[12]/span')
open_now = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[13]/span')
received_yesterday = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[14]')
quantity_ratio = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[15]')
turnover_rate = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[16]')
per = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[17]')
pbr = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[18]')
for i in range(len(num)):
data = {
'序号': num[i].text,
'代码': code[i].text,
'名称': name[i].text,
'股吧': stock_bar[i].get_attribute('href'),
'资金流': capital_flow[i].get_attribute('href'),
'数据': record[i].get_attribute('href'),
'最新价': latest_price[i].text,
'涨跌幅': chg[i].text,
'涨跌额': rise_and_fall_amount[i].text,
'成交量(手)': turnover[i].text,
'成交额': transaction_amount[i].text,
'振幅': amplitude[i].text,
'最高': highest[i].text,
'最低': lowest[i].text,
'今开': open_now[i].text,
'昨收': received_yesterday[i].text,
'量比': quantity_ratio[i].text,
'换手率': turnover_rate[i].text,
'市盈率(动态)': per[i].text,
'市净率': pbr[i].text,
}
db.insert_one(data)
print(data)
# 检查下一页按钮是否可点击
try:
next_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="main-table_paginate"]/a[2]')))
next_button.click()
time.sleep(1)
except Exception as e:
print(f"翻页操作失败或已到达最后一页: {e}")
break
except Exception as e:
print(f"主循环异常: {e}")
finally:
driver.close()
实现结果
更多推荐
已为社区贡献3条内容
所有评论(0)