閑的無事,看著知乎里種種python優點,按捺不住,裝起python3.4。
網上找了點爬行圖片的代碼,修改至兼容3.4,成功爬行指定url所有jpg圖片,代碼段如下:
import osimport urllibimport urllib.requestimport re#爬行圖片download_path = os.path.dirname(os.path.abspath(__file__))class spider(object): def __init__(self, url): self.url = url def parse(self,content): pattern = 'src="(http://.*/.jpg)/s*"' matchs = re.findall(pattern,content,re.M) return matchs def downloads(self,urls): d_path = download_path + "/test" if not os.path.exists(d_path): os.mkdir(d_path) for url in urls: filename = url.split("/")[-1] PRint (url) print ("Downloads %s" % (filename)) output = "%s/%s" % (d_path, filename) urllib.request.urlretrieve(url,output) def run(self): d_url = self.url fd = urllib.request.urlopen(d_url) try: content = fd.read() content = content.decode("UTF-8") urls = self.parse(content) self.downloads(urls) finally: fd.close()if __name__ == "__main__": sp = spider("http://news.cnfol.com/img/20150814/17638.shtml") sp.run()
新聞熱點
疑難解答