京東商品詳細的請求處理,是先顯示html,然后再ajax請求處理顯示價格。
1.可以運行js,并解析之后得到的html
2.模擬js請求,得到價格
# -*- coding: utf-8 -*-"""根據京東url地址,獲取商品價格京東請求處理過程,先顯示html頁面,然后通過ajax get請求獲取相應的商品價格 1.商品的具體數據在html中的格式,如下(示例)# product: {# skuid: 1310118868,# name: '/u9999/u5f71/u77ed/u88d9/u4e24/u4ef6/u5957/u88c5/u5973/u0032/u0030/u0031/u0034/u51ac/u88c5/u65b0/u6b3e/u97e9/u7248/u957f/u8896/u0054/u6064/u4e0a/u8863/u8377/u53f6/u8fb9/u534a/u8eab/u88d9/u6f6e/u0020/u85cf/u9752/u0020/u004d',# skuidkey:'7781F505B71CE37A3AFBADA119D3587F',# href: 'http://item.jd.com/1310118868.html',# src: 'jfs/t385/197/414081450/336886/3070537b/541be890N2995990c.jpg',# cat: [1315,1343,1355],# brand: 18247,# nBrand: 18247,# tips: false,# type: 2,# venderId:38824,# shopId:'36786',# TJ:'0',# specialAttrs:["is7ToReturn-1"],# videoPath:'',# HM:'0'# } 2.ajax請求代碼如下:# // 獲得數字價格# var getPriceNum = function(skus, $wrap, perfix, callback) {# skus = typeof skus === 'string' ? [skus]: skus;# $wrap = $wrap || $('body');# perfix = perfix || 'J-p-';# $.ajax({# url: 'http://p.3.cn/prices/mgets?skuIds=J_' + skus.join(',J_') + '&type=1',# dataType: 'jsonp',# success: function (r) {# if (!r && !r.length) {# return false;# }# for (var i = 0; i < r.length; i++) {# var sku = r[i].id.replace('J_', '');# var price = parseFloat(r[i].p, 10);## if (price > 0) {# $wrap.find('.'+ perfix + sku).html('¥' + r[i].p + '');# } else {# $wrap.find('.'+ perfix + sku).html('暫無報價');# }## if ( typeof callback === 'function' ) {# callback(sku, price, r);# }# }# }# });# };"""import urllibimport jsonimport re class JdPrice(object): """ 對獲取京東商品價格進行簡單封裝 """ def __init__(self, url): self.url = url self._response = urllib.urlopen(self.url) self.html = self._response.read() def get_product(self): """ 獲取html中,商品的描述(未對數據進行詳細處理,粗略的返回str類型) :return: """ product_re = re.compile(r'compatible: true,(.*?)};', re.S) product_info = re.findall(product_re, self.html)[0] return product_info def get_product_skuid(self): """ 通過獲取的商品信息,獲取商品的skuid :return: """ product_info = self.get_product() skuid_re = re.compile(r'skuid: (.*?),') skuid = re.findall(skuid_re, product_info)[0] return skuid def get_product_name(self): pass def get_product_price(self): """ 根據商品的skuid信息,請求獲得商品price :return: """ price = None skuid = self.get_product_skuid() url = 'http://p.3.cn/prices/mgets?skuIds=J_' + skuid + '&type=1' price_json = json.load(urllib.urlopen(url))[0] if price_json['p']: price = price_json['p'] return price # 測試代碼if __name__ == '__main__': url = 'http://item.jd.com/1310118868.html' url = 'http://item.jd.com/1044773.html' jp = JdPrice(url) print jp.get_product_price() # htm.decode('gb2312', 'ignore').encode('utf-8')# f = open('jjs.html', 'w')# f.write(htm)# f.close()再給大家分享一個京東價格的爬蟲:
fromcreepyimportCrawlerfromBeautifulSoupimportBeautifulSoupimporturllib2importjsonclassMyCrawler(Crawler):defprocess_document(self,doc):ifdoc.status==200:print[%d]%s%(doc.status,doc.url)try:soup=BeautifulSoup(doc.text.decode(gb18030).encode(utf-8))exceptExceptionase:printesoup=BeautifulSoup(doc.text)printsoup.find(id="product-intro").div.h1.texturl_id=urllib2.unquote(doc.url).decode(utf8).split(/)[-1].split(.)[0]f=urllib2.urlopen(http://p.3.cn/prices/get?skuid=J_+url_id,timeout=5)price=json.loads(f.read())f.close()printprice[0][p]else:passcrawler=MyCrawler()crawler.set_follow_mode(Crawler.F_SAME_HOST)crawler.set_concurrency_level(16)crawler.add_url_filter(.(jpg|jpeg|gif|png|js|css|swf)$)crawler.crawl(http://item.jd.com/982040.html)
新聞熱點
疑難解答
圖片精選