为什么我获取到信息是这样子的啊,是被反爬了吗。怎么解决啊
<!doctype html>
<!--[if lt IE 7]><html class="ie ie6"><![endif]-->
<!--[if IE 7]><html class="ie ie7"><![endif]-->
<!--[if IE 8]><html class="ie ie8"><![endif]-->
<!--[if IE 9]><html class="ie9"><![endif]-->
<!--[if (gt IE 9)|!(IE)]><!--><html class="standard"><!--<![endif]-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<title>BOSS直聘</title>
<script>var staticPath="https://static.zhipin.com/zhipin-geek/chat/v365/"</script>
<link rel="shortcut icon" href="https://www.zhipin.com/favicon.ico">
<link rel="stylesheet" href="https://static.zhipin.com/v2/web/geek/css/prop-mall.css">
<script src="https://z.zhipin.com/H5/js/plugins/web-report.min-1.28.js"></script>
<script>try{performanceReport({action:"action_js_monitor",appKey:"ObsSRskiryhn60pf",isAjax:!(-1<navigator.userAgent.indexOf("Atlantis"))})}catch(t){}</script>
<link href="https://static.zhipin.com/zhipin-geek/chat/v365/static/css/vendor.4e4fcdef.css" rel="stylesheet" crossorigin="anonymous"><link href="https://static.zhipin.com/zhipin-geek/chat/v365/static/css/app.4e4fcdef.css" rel="stylesheet" crossorigin="anonymous"></head>
<body>
<script>!function(c,b,d,a){with(c[a]||(c[a]={}),c[a].config={pid:"h9614n96f5@7f36ca53ae12344",appType:"web",imgUrl:"https://arms-retcode.aliyuncs.com/r.png?",disableHook:!0,enableSPA:!0},b)with(body)with(insertBefore(createElement("script"),firstChild))setAttribute("crossorigin","",src=d)}(window,document,"https://retcode.alicdn.com/retcode/bl.js","__bl")</script>
<div id="app">
<div class="data-tips">
<div class="page-loading">
<span class="component-b"></span>
<span class="component-o"></span>
<span class="component-s1"></span>
<span class="component-s2"></span>
<p class="gray">加载中,请稍候</p>
</div>
</div>
</div>
<input type="hidden" id="page_key_name" value="">
<script src="https://static.zhipin.com/library/js/lib/jquery-1.12.2.min.js" crossorigin="anonymous"></script>
<script src="https://static.zhipin.com/v2/web/common/mqtt-v2.1.min.js" crossorigin="anonymous"></script>
<script>function get_share_datas_from_html_inapp(){var e="shdefault",t=getQueryString("shid"),n=getQueryString("sid");t?e=t:n&&(e=n);var r="pkdefault",a="ppdefault",u=document.getElementById("page_key_name");if(u){var i=u.value;if(i){var l=i.split("|");1==l.length?r=l[0]:2<=l.length&&(r=l[0],a=l[1])}}var g=new Array;return g.shid=e,g.pk=r,g.pp=a,g}function getQueryString(e){var t=new RegExp("(^|&)"+e+"=([^&]*)(&|$)"),n=window.location.search.substr(1).match(t);return null!=n?unescape(n[2]):null}</script>
<script>var _T=_T||[];!function(){var t=document.createElement("script");t.src="https://static.zhipin.com/library/js/analytics/ka.zhipin.min.js";var e=document.getElementsByTagName("script")[0];e.parentNode.insertBefore(t,e)}()</script>
<script src="https://static.zhipin.com/library/js/lib/vue-core-v1.0.0.min.js"></script>
<script src="https://static.zhipin.com/library/js/utils/lottie_light-v5.9.4.min.js"></script>
<script src="https://static.zhipin.com/zhipin-geek/chat/v365/static/js/vendor.4e4fcdef.js" crossorigin="anonymous"></script><script src="https://static.zhipin.com/zhipin-geek/chat/v365/static/js/app.4e4fcdef.js" crossorigin="anonymous"></script><script src="https://static.zhipin.com/zhipin-geek/chat/v365/static/js/boss-ui.4e4fcdef.js" crossorigin="anonymous"></script></body>
</html>
20
收起
正在回答 回答被采纳积分+1
1回答
好帮手慕小猿
2023-10-06 15:40:35
同学,你好!是同学没找对链接。boss不适合使用spider 爬取,boss 搜索职位使用的是接口。使用接口返回的数据,链接如下:

找对url ,请求时添加headers 可请求成功,参考代码如下:
import requests,json
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.289 Safari/537.36',
'Cookie':"lastCity=101030100; __g=-; __l=l=%2Fwww.zhipin.com%2Ftianjin%2F&r=&g=&s=3&friend_source=0; wd_guid=b1063da9-69c1-4dc4-9634-03917ed74432; historyState=state; _bl_uid=gjlv6n60ety9eIpF23pCqX51LLRC; __c=1696576196; __a=70432027.1696576196..1696576196.4.1.4.4; __zp_stoken__=a086eSGQtC0FXKF54dQZZaU1GE34nN0hnRDx5HF9RNxpgaWw%2FXlYbN00qU3llBCg0X2dsWCFYZH4PYW8IUEdcIGB2C35qBhI7ZD1JFzUjRisqWkMqcSYAS0RDMgYaH0pxRwMGUlBDRAAcagMsUwdyPyotKBl1ZzRPI0VDHyJ4T0QoPR5HJXUzHCRWEQxYJngONX1sWGAGFg%3D%3D"}
response=requests.get("https://www.zhipin.com/wapi/zpgeek/search/joblist.json?scene=1&query=python&city=101030100&experience=&payType=&partTime=°ree=&industry=&scale=&stage=&position=&jobType=&salary=&multiBusinessDistrict=&multiSubway=&page=1&pageSize=30",headers=headers)
a=json.loads(response.text)
print(a['zpData']['jobList'])返回数据如下:

祝学习愉快~
恭喜解决一个难题,获得1积分~
来为老师/同学的回答评分吧
0 星