国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 學(xué)院 > 開發(fā)設(shè)計(jì) > 正文

python3.4偽裝成瀏覽器獲取頁面信息失敗

2019-11-14 17:09:20
字體:
供稿:網(wǎng)友

最近學(xué)了下網(wǎng)絡(luò)爬蟲,打算從一個(gè)網(wǎng)站上提取點(diǎn)東西,自己練練手,剛開始還從這個(gè)網(wǎng)站上取了正確的html,后來百般嘗試還是不能取正確的html,希望能得到大家的幫助~

我剛開始的代碼是:

1     url="http://www.karger.com/Collections/Hospital";2     data = urllib.request.urlopen(url).read();3     data=data.decode('gb2312');4     data=BeautifulSoup(data);5     PRint(data);

后來改成下面這樣:

 1 url="http://www.karger.com/Collections/Hospital"; 2  3 headers = [('Host','www.karger.com'), 4             ('Connection', 'keep-alive'),  5             ('Cache-Control', 'max-age=0'), 6              ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'), 7               ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0'),  8               ('Accept-Encoding','gzip, deflate'), 9                ('Accept-Language', 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3'), 10                ('If-None-Match', '90101f995236651aa74454922de2ad74'), 11                ('Referer','"http://www.karger.com/Collections/Hospital"'),12                ('If-Modified-Since', 'Thu, 01 Jan 1970 00:00:00 GMT')] 13 opener = urllib.request.build_opener() 14 opener.addheaders = headers 15 data = opener.open(url).read();16 print(data)  

得到的執(zhí)行結(jié)果都是下面這樣:

b'<html>/r/n<head>/r/n<META NAME="robots" CONTENT="noindex,nofollow">/r/n<script>/r/n(function() {  function getsessionCookies() {   cookieArray = new Array();   var cName = /^//s?incap_ses_/;   var c = document.cookie.split(";");   for (var i = 0; i < c.length; i++) {    key = c[i].substr(0, c[i].indexOf("="));    value = c[i].substr(c[i].indexOf("=") + 1, c[i].length);    if (cName.test(key)) {     cookieArray[cookieArray.length] = value    }   }   return cookieArray  }  function setIncapCookie(vArray) {   try {    cookies = getSessionCookies();    digests = new Array(cookies.length);    for (var i = 0; i < cookies.length; i++) {     digests[i] = simpleDigest((vArray) + cookies[i])    }    res = vArray + ",digest=" + (digests.join())   } catch (e) {    res = vArray + ",digest=" + (encodeURIComponent(e.toString()))   }   createCookie("___utmvc", res, 20)  }  function simpleDigest(mystr) {   var res = 0;   for (var i = 0; i < mystr.length; i++) {    res += mystr.charCodeAt(i)   }   return res  }  function createCookie(name, value, seconds) {   if (seconds) {    var date = new Date();    date.setTime(date.getTime() + (seconds * 1000));    var expires = "; expires=" + date.toGMTString()   } else {    var expires = ""   }   document.cookie = name + "=" + value + expires + "; path=/"  }  function test(o) {   var res = "";   var vArray = new Array();   for (var j = 0; j < o.length; j++) {    var test = o[j][0]    switch (o[j][1]) {    case "exists_boolean":     try { /t if(typeof(eval(test)) != "undefined"){ /t/tvArray[vArray.length] = encodeURIComponent(test + "=true") /t } /t else{ /t/tvArray[vArray.length] = encodeURIComponent(test + "=false") /t }     } catch (e) {      vArray[vArray.length] = encodeURIComponent(test + "=false")     }     break;    case "exists":     try {      vArray[vArray.length] = encodeURIComponent(test + "=" + typeof(eval(test)))     } catch (e) {      vArray[vArray.length] = encodeURIComponent(test + "=" + e)     }     break;    case "value":     try {      vArray[vArray.length] = encodeURIComponent(test + "=" + eval(test).toString())     } catch (e) {      vArray[vArray.length] = encodeURIComponent(test + "=" + e)     }     break;     case "plugins":     try{         p=navigator.plugins         pres=""         for (a in p){pres+=(p[a][/'description/']+" ").substring(0,20)}         vArray[vArray.length] = encodeURIComponent("plugins=" + pres);         }     catch(e){         vArray[vArray.length] = encodeURIComponent("plugins=" +e);         } /tbreak;      case "plugin":     try {      a = navigator.plugins;      for (i in a) {       f = a[i]["filename"].split(".");       if (f.length == 2) {        vArray[vArray.length] = encodeURIComponent("plugin=" + f[1]);        break       }      }     } catch (e) {      vArray[vArray.length] = encodeURIComponent("plugin=" + e)     }     break    }   }   vArray = vArray.join();   return vArray  }  var o = [   ["navigator", "exists_boolean"],   ["navigator.vendor", "value"],   ["Opera", "exists_boolean"],   ["ActiveXObject", "exists_boolean"],   ["navigator.appName", "value"],   ["platform", "plugin"],   ["webkitURL", "exists_boolean"],   ["navigator.plugins.length==0", "value"],   ["_phantom", "exists_boolean"] ];  try {   setIncapCookie(test(o));   document.createElement("img").src = "/_Incapsula_Resource?SWKMTFSR=1&e=" + Math.random()  } catch (e) {   img = document.createElement("img");   img.src = "/_Incapsula_Resource?SWKMTFSR=1&e=" + e  } })();/r/n</script>/r/n<script>/r/n(function() { /r/nvar z="";var bfor (var i=0;i<b.length;i+=2){z=z+parseInt(b.substring(i, i+2), 16)+",";}z = z.substring(0,z.length-1); eval(eval(/'String.fromCharCode(/'+z+/')/'));})();/r/n</script></head>/r/n<body>/r/n<iframe style="display:none;visibility:hidden;" src="http://content.incapsula.com/jsTest.html" id="gaIframe"></iframe>/r/n</body></html>'

希望大家能夠幫我找到問題,謝謝大家~


發(fā)表評(píng)論 共有條評(píng)論
用戶名: 密碼:
驗(yàn)證碼: 匿名發(fā)表
主站蜘蛛池模板: 揭东县| 泰宁县| 西华县| 来宾市| 沙坪坝区| 玉树县| 南江县| 涞源县| 乐陵市| 桃园市| 泗水县| 大埔区| 长子县| 澳门| 弥渡县| 巴林左旗| 当涂县| 攀枝花市| 县级市| 丘北县| 平昌县| 西乌珠穆沁旗| 安乡县| 抚顺市| 富阳市| 乌恰县| 商都县| 邵阳市| 温宿县| 万全县| 齐齐哈尔市| 班戈县| 西乌珠穆沁旗| 新泰市| 万荣县| 安宁市| 禹州市| 原平市| 永康市| 崇仁县| 耒阳市|