python爬虫,58汽车信息爬取(selenium)
coding=utf-8“”"author:leifunction:“”"import timefrom selenium import webdriverimport jsonclass Car(object):def init(self):self.url = “https://sanhe.58.com/ershouche/”options = webdriver.ChromeOptions(
·
coding=utf-8
“”"
author:lei
function:
“”"
import time
from selenium import webdriver
import json
class Car(object):
def init(self):
self.url = “https://sanhe.58.com/ershouche/”
options = webdriver.ChromeOptions()
options.binary_location = r"D:\文件2\Google\Chrome\Application\chrome.exe"
self.webdriver = webdriver.Chrome(r"D:\文件\软件\chromedriver_win32\chromedriver.exe", options=options)
def parse_data(self):
el_list = self.webdriver.find_elements_by_xpath("//ul[@class='infos infos-card h-clearfix']/li[@class='info']")
# print(len(el_list))
temp_list = []
for el in el_list:
temp = {}
temp["car_name"] = el.find_element_by_xpath(".//div[1]/a/h2/span").get_attribute("title")
temp["car_value"] = el.find_element_by_xpath(".//div[2]/b").text
temp["information"] = el.find_element_by_xpath(".//div[1]/div[2]").text
print(temp)
temp_list.append(temp)
return temp_list
def save_list(self, temp_list):
print(temp_list)
# with open("car.json", "a", encoding="utf-8") as f:
# f.write(json.dumps(temp_list, ensure_ascii=False))
# print("保存成功!")
def run(self):
self.webdriver.get(self.url)
while True:
temp_list = self.parse_data()
self.save_list(temp_list)
try:
next_page = self.webdriver.find_element_by_xpath("//a[@class='next']")
self.webdriver.execute_script("scrollTo(0, 10000)")
next_page.click()
except:
break
self.webdriver.quit()
if name == ‘main’:
car = Car()
car.run()
更多推荐
已为社区贡献3条内容
所有评论(0)