python通过条码(商品名)查询商品信息(完整版)

前言录入商品数据太痛苦了，但是又不能不录，所以这时候需要写一个python脚本来完成，仅供参考。开始可能会给一个exec表格给你，或者一个文件夹条码图片给你.....要你根据条码(商品名)找到对应的图片，保存起来。项目代码地址1.通过百度爬取商品图片(免费接口)# 通过配合商品名通过百度找图片def getBaiDu(shop_id, search_title):baidu_url ="http:

An_s

3929人浏览 · 2021-10-14 16:03:06

An_s · 2021-10-14 16:03:06 发布

前言

录入商品数据太痛苦了，但是又不能不录，所以这时候需要写一个python脚本来完成，仅供参考。

开始

可能会给一个exec表格给你，或者一个文件夹条码图片给你.....要你根据条码(商品名)找到对应的图片，保存起来。

项目代码地址

1.通过百度搜索爬取商品图片

# 通过配合商品名通过百度找图片
def getBaiDu(shop_id, search_title):
    baidu_url ="http://image.baidu.com/search/flip?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1460997499750_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word={}".format(search_title)
    result = requests.get(baidu_url, headers=headers)
    dowmloadPic(result.text, shop_id)

def dowmloadPic(html, shop_id):
    # 爬取多少张
    num_download = 5
    # 新建目录
    mkdir(path + '\\' + shop_id)
    for addr in re.findall('"objURL":"(.*?)"', html, re.S):
        if num_download < 0:
            break
        logger.info('现在正在爬取URL中的地址：' + str(addr))
        try:
            pic = requests.get(addr, timeout=10, headers=headers)
        except requests.exceptions.ConnectionError:
            logger.info('您当前的URL出现错误！')
            continue
        localtime = time.strftime("%Y%m%d%H%M%S", time.localtime())
        fn = open(path + '\\' + shop_id + '\\' + str(localtime) +'.png','wb')
        fn.write(pic.content)
        fn.close()

        # drop_wartermark(path + '\\' + shop_id + '\\' + str(localtime) +'.png', path + '\\' + shop_id + '\\' + str(localtime) +'-0.png')

        num_download = num_download - 1
        logger.info(path + '\\' + shop_id + '\\' + str(localtime) +'.png')

2.根据(tiaoma.cnaidc.com)网站搜索商品信息

# 爬取 "tiaoma.cnaidc.com" 来查找商品信息
def requestT1(shop_id):
    url = 'http://tiaoma.cnaidc.com'
    s = requests.session()

    # 获取验证码
    img_data  = s.get(url + '/index/verify.html?time=',  headers=headers).content
    with open('verification_code.png','wb') as v:
        v.write(img_data)

    # 解验证码
    ocr = ddddocr.DdddOcr()
    with open('verification_code.png', 'rb') as f:
        img_bytes = f.read()
    code = ocr.classification(img_bytes)
    logger.info('当前验证码为 ' + code)
    # 请求接口参数
    data = {"code": shop_id, "verify": code}
    resp = s.post(url + '/index/search.html',headers=headers,data=data)
    resp_json = parse_json(resp.text)
    logger.info(resp_json)
    # 判断是否查询成功
    if resp_json['msg'] == '查询成功' and resp_json['json'].get('code_img'):
        # 保存商品图片
        img_url = ''
        if resp_json['json']['code_img'].find('http') == -1:
            img_url =  url + resp_json['json']['code_img']
        else:
            img_url =  resp_json['json']['code_img']

        try:
            shop_img_data  = s.get(img_url,  headers=headers, timeout=10,).content
             # 新建目录
            mkdir(path + '\\' + shop_id)
            localtime = time.strftime("%Y%m%d%H%M%S", time.localtime())
            # 保存图片
            with open(path + '\\' + shop_id + '\\' + str(localtime) +'.png','wb') as v:
                v.write(shop_img_data)
            logger.info(path + '\\' + shop_id + '\\' + str(localtime) +'.png')
        except requests.exceptions.ConnectionError:
            logger.info('访问图片URL出现错误！') 
       
    if resp_json['msg'] == '验证码错误':
        requestT1(shop_id)

3.根据中国物品编码搜索商品信息

# 中国物品编码
def requestT2(shop_id):
    s = requests.session()
    t2_url = 'http://search.anccnet.com/searchResult2.aspx?keyword='+ shop_id
    headers['Cookie'] = 'ASP.NET_SessionId=blgmvuf5s54mtz45si25rga2' # 需要手动获取
    headers['Host'] = 'search.anccnet.com'
    headers['Referer'] = 'http://search.anccnet.com/searchResult2.aspx'
    resp = s.get(t2_url, headers=headers)
    soup = BeautifulSoup(resp.text, 'lxml')
    results = soup.find(attrs={"id":"results"})
    if results:
        for div_tag in results.find_all('div', {'class': 'result'}):
            p_info = div_tag.find('dl', {'class': 'p-info'})
            dd = p_info.find_all('dd')
            shop_name = dd[3].text
            a = dd[0].find('a')
            # 获取商品详情的大图
            shop_resp = s.get(a['href'], headers=headers)
            shop_soup = BeautifulSoup(shop_resp.text, 'lxml')
            results_img = shop_soup.find(attrs={"id":"imageListDiv"})
            if results_img:
                # 新建目录
                mkdir(path + '\\' + shop_id)
                for img_tag in results_img.find_all('img'):
                    try:
                        pic = requests.get(img_tag['src'], timeout=10, headers=headers)
                    except requests.exceptions.ConnectionError:
                        logger.info('访问商品图片出现错误！')
                        continue
                    localtime = time.strftime("%Y%m%d%H%M%S", time.localtime())
                    fn = open(path + '\\' + shop_id + '\\' + str(localtime) +'.png','wb')
                    fn.write(pic.content)
                    fn.close()
                    logger.info(path + '\\' + shop_id + '\\' + str(localtime) +'.png')
            else:
                logger.info('条码：{0} 商品名：{1} 当前抓取商品无图片！'.format(shop_id, shop_name))
    else:
        logger.info('当前访问过快啦~中国物品编码拒绝了我们的访问, 请等待30秒在访问')
        time.sleep(30)
        requestT2(shop_id)

腾讯云开发者社区

腾讯云面向开发者汇聚海量精品云计算使用和开发经验，营造开放的云计算技术生态圈。

更多推荐

自动化提示词生成工具盘点

腾讯云开发者社区

AI 浪潮下的锚与帆：工程师文化的变与不变 | 架构师夜生活

腾讯云开发者社区

腾讯云架构师技术沙龙 · 长沙站圆满落幕，共话AI驱动下的技术架构与前沿应用

人工智能已成为推动技术创新与产业变革的重要引擎，开发者正身处一场前所未有的技术变革之中。通过本次腾讯云架构师技术沙龙，各位专家深入分享前沿技术洞察，探讨 AI 落地的应用路径与实践经验，为架构师的职业发展指明方向。腾讯云架构师长沙同盟和腾讯云架构师技术同盟长沙地区理事会正式成立。未来，腾讯云架构师长沙同盟将凝心聚力，打造属于本地架构师的学习与成长的家园，助力中国架构的蓬勃发展。未来已来，让我们携手