怎么样获取全部的汽车品牌呢？

我用老师的方法只能获取热门的品牌

qq_夜阑卧听风吹雨_03543626 2020-03-20

源自：实战：抓取二手车网站汽车数据 2-5 爬取城市名称和车系品牌等数据

收起

2回答

时间， 2020-03-22 09:46:55

同学，你好。同学可以定义一个空字典，将得到的数据放到该字典中

如果我的回答解决了您的疑惑，请采纳！祝学习愉快~~~~

收起回答

好帮手乔木 2020-03-20 14:29:41

同学你好：

当前的网站对js文件进行了重新编译。

import json
 
import requests
import execjs
import re
 
from lxml import etree
 
from handle_mongo import mongo
 
url = 'https://www.guazi.com/anji/buy/'
 
 
header = {
    "ccept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Cache-Control": "max-age=0",
    "Connection": "keep-alive",
    "Host": "www.guazi.com",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "none",
    "Sec-Fetch-User": "?1",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36",
}
 
response = requests.get(url, headers=header)
response.encoding = 'utf-8'
if '正在打开中,请稍后...' in response.text:
    value_pattern = re.compile("anti\('(.*?)','(.*?)'\)")
    string = value_pattern.search(response.text).group(1)
    key = value_pattern.search(response.text).group(2)
    # 读取获取的js
    with open('guazi.js', 'r') as f:
        file_js = f.read()
    js = execjs.compile(file_js)
    js_return = js.call('anti', string, key)
    cookie_value = 'antipas=' +js_return
    header['Cookie'] = cookie_value
    response = requests.get(url, headers=header)
    # print(response.text)
    guazi_html = etree.HTML(response.text)
    # 通过js获取城市名称
    script_js = guazi_html.xpath("//script[3]/text()")[0]
    city_search = re.compile(r'({.*?});')
    city = city_search.findall(script_js)
    # cityLeft获取城市的中文和英文名
    cityOne = json.loads(city[0])
    cityTwo = json.loads(city[1])
    A_N = [chr(i) for i in range(65, 78)]
    M_Z = [chr(i) for i in range(78, 91)]
    all_city = []
    for i in A_N:
        # 根据获取相同首字母的城市列表
        each_list1 = cityOne.get(i)
        if each_list1:
            all_city.append(each_list1)
    for i in M_Z:
        each_list2 = cityTwo.get(i)
        if each_list2:
            all_city.append(each_list2)
    brand_list = guazi_html.xpath('//div[@class="dd-all clearfix js-brand js-option-hid-info"]//a')
    info_list = []
    brand_url = re.compile(r'\/.*?\/(.*?)\/#bread')
    #所有的城市
    print(all_city)
    for item in brand_list:
        bl = brand_url.search(item.xpath('./@href')[0]).group(1)
        #所有的品牌名称
        print(bl)