python通过条码(商品名)查询商品信息(完整版)
前言录入商品数据太痛苦了,但是又不能不录,所以这时候需要写一个python脚本来完成,仅供参考。开始可能会给一个exec表格给你,或者一个文件夹条码图片给你.....要你根据条码(商品名)找到对应的图片,保存起来。项目代码地址1.通过百度爬取商品图片(免费接口)# 通过配合商品名通过百度找图片def getBaiDu(shop_id, search_title):baidu_url ="http:
·
前言
录入商品数据太痛苦了,但是又不能不录,所以这时候需要写一个python脚本来完成,仅供参考。
开始
可能会给一个exec表格给你,或者一个文件夹条码图片给你.....要你根据条码(商品名)找到对应的图片,保存起来。
1.通过百度搜索爬取商品图片
# 通过配合商品名通过百度找图片
def getBaiDu(shop_id, search_title):
baidu_url ="http://image.baidu.com/search/flip?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1460997499750_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word={}".format(search_title)
result = requests.get(baidu_url, headers=headers)
dowmloadPic(result.text, shop_id)
def dowmloadPic(html, shop_id):
# 爬取多少张
num_download = 5
# 新建目录
mkdir(path + '\\' + shop_id)
for addr in re.findall('"objURL":"(.*?)"', html, re.S):
if num_download < 0:
break
logger.info('现在正在爬取URL中的地址:' + str(addr))
try:
pic = requests.get(addr, timeout=10, headers=headers)
except requests.exceptions.ConnectionError:
logger.info('您当前的URL出现错误!')
continue
localtime = time.strftime("%Y%m%d%H%M%S", time.localtime())
fn = open(path + '\\' + shop_id + '\\' + str(localtime) +'.png','wb')
fn.write(pic.content)
fn.close()
# drop_wartermark(path + '\\' + shop_id + '\\' + str(localtime) +'.png', path + '\\' + shop_id + '\\' + str(localtime) +'-0.png')
num_download = num_download - 1
logger.info(path + '\\' + shop_id + '\\' + str(localtime) +'.png')
2.根据(tiaoma.cnaidc.com)网站搜索商品信息
# 爬取 "tiaoma.cnaidc.com" 来查找商品信息
def requestT1(shop_id):
url = 'http://tiaoma.cnaidc.com'
s = requests.session()
# 获取验证码
img_data = s.get(url + '/index/verify.html?time=', headers=headers).content
with open('verification_code.png','wb') as v:
v.write(img_data)
# 解验证码
ocr = ddddocr.DdddOcr()
with open('verification_code.png', 'rb') as f:
img_bytes = f.read()
code = ocr.classification(img_bytes)
logger.info('当前验证码为 ' + code)
# 请求接口参数
data = {"code": shop_id, "verify": code}
resp = s.post(url + '/index/search.html',headers=headers,data=data)
resp_json = parse_json(resp.text)
logger.info(resp_json)
# 判断是否查询成功
if resp_json['msg'] == '查询成功' and resp_json['json'].get('code_img'):
# 保存商品图片
img_url = ''
if resp_json['json']['code_img'].find('http') == -1:
img_url = url + resp_json['json']['code_img']
else:
img_url = resp_json['json']['code_img']
try:
shop_img_data = s.get(img_url, headers=headers, timeout=10,).content
# 新建目录
mkdir(path + '\\' + shop_id)
localtime = time.strftime("%Y%m%d%H%M%S", time.localtime())
# 保存图片
with open(path + '\\' + shop_id + '\\' + str(localtime) +'.png','wb') as v:
v.write(shop_img_data)
logger.info(path + '\\' + shop_id + '\\' + str(localtime) +'.png')
except requests.exceptions.ConnectionError:
logger.info('访问图片URL出现错误!')
if resp_json['msg'] == '验证码错误':
requestT1(shop_id)
3.根据 中国物品编码 搜索商品信息
# 中国物品编码
def requestT2(shop_id):
s = requests.session()
t2_url = 'http://search.anccnet.com/searchResult2.aspx?keyword='+ shop_id
headers['Cookie'] = 'ASP.NET_SessionId=blgmvuf5s54mtz45si25rga2' # 需要手动获取
headers['Host'] = 'search.anccnet.com'
headers['Referer'] = 'http://search.anccnet.com/searchResult2.aspx'
resp = s.get(t2_url, headers=headers)
soup = BeautifulSoup(resp.text, 'lxml')
results = soup.find(attrs={"id":"results"})
if results:
for div_tag in results.find_all('div', {'class': 'result'}):
p_info = div_tag.find('dl', {'class': 'p-info'})
dd = p_info.find_all('dd')
shop_name = dd[3].text
a = dd[0].find('a')
# 获取商品详情的大图
shop_resp = s.get(a['href'], headers=headers)
shop_soup = BeautifulSoup(shop_resp.text, 'lxml')
results_img = shop_soup.find(attrs={"id":"imageListDiv"})
if results_img:
# 新建目录
mkdir(path + '\\' + shop_id)
for img_tag in results_img.find_all('img'):
try:
pic = requests.get(img_tag['src'], timeout=10, headers=headers)
except requests.exceptions.ConnectionError:
logger.info('访问商品图片出现错误!')
continue
localtime = time.strftime("%Y%m%d%H%M%S", time.localtime())
fn = open(path + '\\' + shop_id + '\\' + str(localtime) +'.png','wb')
fn.write(pic.content)
fn.close()
logger.info(path + '\\' + shop_id + '\\' + str(localtime) +'.png')
else:
logger.info('条码:{0} 商品名:{1} 当前抓取商品无图片!'.format(shop_id, shop_name))
else:
logger.info('当前访问过快啦~中国物品编码拒绝了我们的访问, 请等待30秒在访问')
time.sleep(30)
requestT2(shop_id)
更多推荐
已为社区贡献1条内容
所有评论(0)