想要抓取网页的数据,可以看看下面这个模板。仅供参考学习import requestsfrom lxml import etreeimport pandas as pd
url = 'https://www.che168.com/guangxi' #网站网址和反爬header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36'}
resp = requests.get(url, headers = header)print(resp.text) #解析网页数据
html = etree.HTML(resp.text) #使用xpath解析网页数据divs = html.xpath('//*[@id="goodStartSolrQuotePriceCoree"]/ul/li')car_typel = []car_type1 = []car_messagel = []car_pricel = []
for div in divs: #车的类型数据 car_type = div.xpath('./a/div[3]/h4/text()') if car_type: car_typel.append(car_type[0]) #车的信息 car_message = div.xpath('./a/div[3]/p/text()') if car_message: car_messagel.append(car_message[0]) #车的价格 car_price = ''.join(div.xpath('./a/div[3]/div/span//text()')) if car_price: car_pricel.append(car_price)#把抓取的数据放到bataFrame结构里dic =('车型':car_typel,‘信息':car_messagel,'车价':car_pricel}data = pd.DataFrame(dic)
data.to_excel('广西二手车信息.xlsx') #把爬取的数据保存成excel里data.to_csv('广西二手车信息.csv') #把爬取的数据保存成excel里