为啥获取到的page_source一样
import time
from selenium import webdriver
import re
import json
class Handle_webdriver(object):
def __init__(self):
self.driver = webdriver.Chrome()
self.driver.maximize_window()
def handle_job(self):
self.driver.get("https://search.51job.com/list/010000,000000,0000,00,9,99,%20,2,1.html")
input_keyword = 'golang'
self.driver.find_element_by_id('keywordInput').send_keys(input_keyword)
self.driver.find_element_by_class_name('p_but').click()
while True:
time.sleep(2)
self.handle_parse(self.driver.page_source)
self.driver.find_element_by_xpath("//li[@class='next']/a").click()
self.driver.quit()
def handle_parse(self, page_source):
re_job_search = re.compile(r'window.__SEARCH_RESULT__\s=\s(.*?)</script>')
job_data = re_job_search.search(page_source)
info_list = []
if job_data:
job_data_list = json.loads(job_data.group(1)).get("engine_search_result")
for job_item in job_data_list:
info = {}
info['job_name'] = job_item['job_name']
info['company_name'] = job_item['company_name']
info['company_address'] = job_item['workarea_text']
info['money'] = job_item['providesalary_text']
info['date'] = job_item['updatedate']
info_list.append(info)
print(info_list)
test_selenium = Handle_webdriver()
test_selenium.handle_job()
正在回答 回答被采纳积分+1
- 参与学习 人
- 提交作业 107 份
- 解答问题 1672 个
Python最广为人知的应用就是爬虫了,有趣且酷的爬虫技能并没有那么遥远,本阶段带你学会利用主流Scrapy框架完成爬取招聘网站和二手车网站的项目实战。
了解课程
恭喜解决一个难题,获得1积分~
来为老师/同学的回答评分吧
0 星