1.首先要排查淘宝获取商品数据的请求方法
输入淘宝网后打开开发者工具(F12),点击网络。我用的是火狐浏览器
在这里插入图片描述

2.爬取数据首先需要获取如下请求头参数
cookie
Referer
在这里插入图片描述3.获取到基本信息后替换如下java代码中的对应位置,代码如下

import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
public class taobao {

	public static void main(String[] args) {
		try {
	        String url = "https://s.taobao.com/search?q=%E9%A5%BC%E5%B9%B2&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306";
	        URL realUrl = new URL(url);
	        HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection();
	        connection.setRequestProperty("accept", "*/*");
	        connection.setRequestProperty("connection", "Keep-Alive");
	        connection.setRequestProperty("Referer", "https://s.taobao.com/search?q=%E9%A5%BC%E5%B9%B2&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306");
	        connection.setRequestProperty("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0");
	        connection.setRequestProperty("Cookie", "isg=BKSkHbPtmFr5u-wWzetE4qH9dqKWPcinA1Zw6L7EtW8yaUcz4Ek0N7OLKUHxqgD_; l=eBIUYKLVjRUD9zzEBO5Cnurza77TzIOV1kPzaNbMiInca6NC1FZm8NCQO6rkRdtfgtfXHFxyyIVnedeDy64dg2HvCbH_7qCkixJ6-; tfstk=cBiVBPgX1nK2IgrLmoZN1SAldg2Aa_DmIgyQo40dN8ZvHBUu_sYR68XEN8y1WhUc.; cna=RoBIFSzrQFoCAdOQevvZwOn8; miid=518745632110493350; thw=cn; UM_distinctid=17608fd033d2db-057cbf7fac634f8-4c3f2779-1fa400-17608fd033e412; t=d31becaca80987bb75837c77484d2d98; _m_h5_tk=e7afd812cbf21125ef802b63c8df94c6_1615710111436; _m_h5_tk_enc=fd3a73ffa66e2b2242…e390a0d829; existShop=MTYxNTcwMDg4Nw%3D%3D; uc4=id4=0%40U2LDagxTFgkHppc9r9oGtNL7iNUp&nk4=0%40oijaZ6LWXvj5TA1rnr%2B24BPd4g%3D%3D; tracknick=%5Cu7F8E%5Cu4E3E%5Cu7684%5Cu4E8C; _cc_=Vq8l%2BKCLiw%3D%3D; enc=ZxiYLYwbwjFjE%2F9z8Ry2Ak9smH38MLEK%2Fw6nAKoUZE5RDeYeor0cyWVYpncyvKZHSGpS3MKKCsx6hyn%2FNoJqfg%3D%3D; JSESSIONID=250725228E4A009D0FBF207D59D02264; hng=CN%7Czh-CN%7CCNY%7C156; mt=ci=0_1; uc1=pas=0&cookie16=VT5L2FSpNgq6fDudInPRgavC%2BQ%3D%3D&cookie14=Uoe1hx5b4Yrf5A%3D%3D&cookie21=Vq8l%2BKCLjA%2Bl&existShop=false");
	        // 建立实际的连接
	        connection.connect();
	        //请求成功
	        System.out.println("请求状态:"+connection.getResponseCode());
//	        if (connection.getResponseCode() == 200) {
	            InputStream is = connection.getInputStream();
	            ByteArrayOutputStream baos = new ByteArrayOutputStream();
	            //10MB的缓存
	            byte[] buffer = new byte[10485760];
	            int len = 0;
	            while ((len = is.read(buffer)) != -1) {
	                baos.write(buffer, 0, len);
	            }
	            String jsonString = baos.toString();
	            System.out.println("jsonString:"+jsonString);
	            baos.close();
	            is.close();
	            //转换成json数据处理
//	        }
	} catch (Exception e) {
		System.out.println(e);
	}
}
}

4.执行数据如下。获取的是整个页面的数据。商品数据需要自己截取出来做处理
在这里插入图片描述

Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐