本文主要分享關于python登錄并爬取淘寶信息的相關代碼,還是挺不錯的,大家可以了解下。
#!/usr/bin/env python # -*- coding:utf-8 -*- from selenium import webdriver import time import datetime import traceback import logging import os from selenium.webdriver.common.action_chains import ActionChains import codecs #登錄 def login(driver,site):  driver.get(site)  time.sleep(5)  try:   #點擊請登錄   driver.find_element_by_class_name("h").click()   time.sleep(5)   #輸入賬號和密碼   driver.find_element_by_id("TPL_username_1").send_keys(u"yourusername")   time.sleep(5)   #print driver.find_element_by_id("TPL_username_1")   driver.find_element_by_id("TPL_password_1").send_keys(u"yourpsd")   time.sleep(5)    #點擊登錄   driver.find_element_by_id("J_SubmitStatic").click()   time.sleep(30)  except:   print u"failure" def crawlmarket(driver,filename,site):  #driver = webdriver.Firefox()  driver.get(site)  driver.maximize_window()  time.sleep(10)  driver.refresh()  time.sleep(10)  test = driver.find_elements_by_xpath("http://a[@class='J_ItemLink']")  #是否獲取到消息,若無則登錄  if len(test)==0:   login(driver,site)  time.sleep(30)  resultstrall=""  resultstr=""  strinfo =""  for i in range(0,len(test),1):   if test[i].text != "" :    resultstr = test[i].text.strip()+'/n'    print resultstr    resultstrall += resultstr  #是否成功抓取  if resultstrall !="":   f = codecs.open(filename,'w','utf-8')   f.write(resultstrall)   f.close()  #若沒有成功抓取將網站寫入error  else:   strinfo = filename+","+site   print strinfo   ferror = codecs.open("error.txt",'a','utf-8')   ferror.write(strinfo)   ferror.close()  driver.quit() def crawltaobaosousuo(driver,filename,site):  #driver = webdriver.Firefox()  driver.get(site)  driver.maximize_window()  time.sleep(10)  driver.get(site)  time.sleep(30)  driver.refresh()  test = driver.find_elements_by_xpath("http://a[@class='J_ClickStat']")  resultstrall=""  resultstr=""  strinfo =""  for i in range(0,len(test),1):   if test[i].text != "" :    resultstr = test[i].text.strip()+'/n'    print resultstr    resultstrall += resultstr  if resultstrall !="":   f = codecs.open(filename,'w','utf-8')   f.write(resultstrall)   f.close()  else:   strinfo = filename+","+site   print strinfo   ferror = codecs.open("error.txt",'a','utf-8')   ferror.write(strinfo)   ferror.close()  driver.quit() def jiexi(driver):  f = open("1.txt","r")  for line in f:   time.sleep(60)   info = line.split(",")   href = info[1]   filename = info[0].decode("utf-8")   print filename   if "markets" in href:    crawlmarket(driver,filename,href)   else:    crawltaobaosousuo(driver,filename,href) if __name__ =='__main__':  driver = webdriver.Firefox()  jiexi(driver) 小結
有改進策略一起探討,可以抓取淘寶部分網頁內容,根據自己的需求改改吧,會被風控。個人覺得不登錄的效果更好。
新聞熱點
疑難解答