国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 編程 > Python > 正文

Python模擬登陸淘寶并統計淘寶消費情況的代碼實例分享

2019-11-25 16:39:42
字體:
來源:轉載
供稿:網友

支付寶十年賬單上的數字有點嚇人,但它統計的項目太多,只是想看看到底單純在淘寶上支出了多少,于是寫了段腳本,統計任意時間段淘寶訂單的消費情況,看那結果其實在淘寶上我還是相當節約的說。
腳本的主要工作是模擬了瀏覽器登錄,解析“已買到的寶貝”頁面以獲得指定的訂單及寶貝信息。

201674184737787.gif (410×235)

使用方法見代碼或執行命令加參數-h,另外需要BeautifulSoup4支持,BeautifulSoup的官方項目列表頁:https://www.crummy.com/software/BeautifulSoup/bs4/download/

首先來說一下代碼使用方法:

python taobao.py -u USERNAME -p PASSWORD -s START-DATE -e END-DATE --verbose

所有參數均可選,如:

python taobao.py -u jinnlynn 

統計用戶jinnlynn所有訂單的情況

python taobao.py -s 2014-12-12 -e 2014-12-12

統計用戶(用戶名在命令執行時會要求輸入)在2014-12-12當天的訂單情況

python taobao.py --verbose

這樣就可以統計并輸出訂單明細。

好了,說了這么多我們就來看代碼吧:

from __future__ import unicode_literals, print_function, absolute_import, divisionimport urllibimport urllib2import urlparseimport cookielibimport reimport sysimport osimport jsonimport subprocessimport argparseimport platformfrom getpass import getpassfrom datetime import datetimefrom pprint import pprinttry:  from bs4 import BeautifulSoupexcept ImportError:  sys.exit('BeautifulSoup4 missing.')__version__ = '1.0.0'__author__ = 'JinnLynn'__copyright__ = 'Copyright (c) 2014 JinnLynn'__license__ = 'The MIT License'HEADERS = {  'x-requestted-with' : 'XMLHttpRequest',  'Accept-Language' : 'zh-cn',  'Accept-Encoding' : 'gzip, deflate',  'ContentType' : 'application/x-www-form-urlencoded; chartset=UTF-8',  'Cache-Control' : 'no-cache',  'User-Agent' :'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.38 Safari/537.36',  'Connection' : 'Keep-Alive'}DEFAULT_POST_DATA = {  'TPL_username' : '', #用戶名  'TPL_password' : '', #密碼  'TPL_checkcode' : '',  'need_check_code' : 'false',  'callback' : '0', # 有值返回JSON}# 無效訂單狀態INVALID_ORDER_STATES = [  'CREATE_CLOSED_OF_TAOBAO', # 取消  'TRADE_CLOSED', # 訂單關閉]LOGIN_URL = 'https://login.taobao.com/member/login.jhtml'RAW_IMPUT_ENCODING = 'gbk' if platform.system() == 'Windows' else 'utf-8'def _request(url, data, method='POST'):  if data:    data = urllib.urlencode(data)  if method == 'GET':    if data:      url = '{}?{}'.format(url, data)    data = None  # print(url)  # print(data)  req = urllib2.Request(url, data, HEADERS)  return urllib2.urlopen(req)def stdout_cr(msg=''):  sys.stdout.write('/r{:10}'.format(' '))  sys.stdout.write('/r{}'.format(msg))  sys.stdout.flush()def get(url, data=None):  return _request(url, data, method='GET')def post(url, data=None):  return _request(url, data, method='POST')def login_post(data):  login_data = DEFAULT_POST_DATA  login_data.update(data)  res = post(LOGIN_URL, login_data)  return json.load(res, encoding='gbk')def login(usr, pwd):  data = {    'TPL_username' : usr.encode('utf-8' if platform.system() == 'Windows' else 'GB18030'),    'TPL_password' : pwd  }  # 1. 嘗試登錄  ret = login_post(data)  while not ret.get('state', False):    code = ret.get('data', {}).get('code', 0)    if code == 3425 or code == 1000:      print('INFO: {}'.format(ret.get('message')))      check_code = checkcode(ret.get('data', {}).get('ccurl'))      data.update({'TPL_checkcode' : check_code, 'need_check_code' : 'true'})      ret = login_post(data)    else:      sys.exit('ERROR. code: {}, message:{}'.format(code, ret.get('message', '')))  token = ret.get('data', {}).get('token')  print('LOGIN SUCCESS. token: {}'.format(token))  # 2. 重定向  # 2.1 st值  res = get('https://passport.alipay.com/mini_apply_st.js', {    'site' : '0',    'token' : token,    'callback' : 'stCallback4'})  content = res.read()  st = re.search(r'"st":"(/S*)"( |})', content).group(1)  # 2.1 重定向  get('http://login.taobao.com/member/vst.htm',    {'st' : st, 'TPL_uesrname' : usr.encode('GB18030')})def checkcode(url):  filename, _ = urllib.urlretrieve(url)  if not filename.endswith('.jpg'):    old_fn = filename    filename = '{}.jpg'.format(filename)    os.rename(old_fn, filename)  if platform.system() == 'Darwin':    # mac 下直接preview打開    subprocess.call(['open', filename])  elif platform.system() == 'Windows':    # windows 執行文件用默認程序打開    subprocess.call(filename, shell=True)  else:    # 其它系統 輸出文件名    print('打開該文件獲取驗證碼: {}'.format(filename))  return raw_input('輸入驗證碼: '.encode(RAW_IMPUT_ENCODING))def parse_bought_list(start_date=None, end_date=None):  url = 'http://buyer.trade.taobao.com/trade/itemlist/list_bought_items.htm'  #         運費險      增值服務     分段支付(定金,尾款)  extra_service = ['freight-info', 'service-info', 'stage-item']  stdout_cr('working... {:.0%}'.format(0))  # 1. 解析第一頁  res = urllib2.urlopen(url)  soup = BeautifulSoup(res.read().decode('gbk'))  # 2. 獲取頁數相關  page_jump = soup.find('span', id='J_JumpTo')  jump_url = page_jump.attrs['data-url']  url_parts = urlparse.urlparse(jump_url)  query_data = dict(urlparse.parse_qsl(url_parts.query))  total_pages = int(query_data['tPage'])  # 解析  orders = []  cur_page = 1  out_date = False  errors = []  while True:    bought_items = soup.find_all('tbody', attrs={'data-orderid' : True})    # pprint(len(bought_items))    count = 0    for item in bought_items:      count += 1      # pprint('{}.{}'.format(cur_page, count))      try:        info = {}        # 訂單在頁面上的位置 頁數.排序號        info['pos'] = '{}.{}'.format(cur_page, count)        info['orderid'] = item.attrs['data-orderid']        info['status'] = item.attrs['data-status']        # 店鋪        node = item.select('tr.order-hd a.shopname')        if not node:          # 店鋪不存在,可能是贈送彩票訂單,忽略          # print('ignore')          continue        info['shop_name'] = node[0].attrs['title'].strip()        info['shop_url'] = node[0].attrs['href']        # 日期        node = item.select('tr.order-hd span.dealtime')[0]        info['date'] = datetime.strptime(node.attrs['title'], '%Y-%m-%d %H:%M')        if end_date and info['date'].toordinal() > end_date.toordinal():          continue        if start_date and info['date'].toordinal() < start_date.toordinal():          out_date = True          break        # 寶貝        baobei = []        node = item.find_all('tr', class_='order-bd')        # pprint(len(node))        for n in node:          try:            bb = {}            if [True for ex in extra_service if ex in n.attrs['class']]:              # 額外服務處理              # print('額外服務處理')              name_node = n.find('td', class_='baobei')              # 寶貝地址              bb['name'] = name_node.text.strip()              bb['url'] = ''              bb['spec'] = ''              # 寶貝快照              bb['snapshot'] = ''              # 寶貝價格              bb['price'] = 0.0              # 寶貝數量              bb['quantity'] = 1              bb['is_goods'] = False              try:                bb['url'] = name_node.find('a').attrs['href']                bb['price'] = float(n.find('td', class_='price').text)              except:                pass            else:              name_node = n.select('p.baobei-name a')              # 寶貝地址              bb['name'] = name_node[0].text.strip()              bb['url'] = name_node[0].attrs['href']              # 寶貝快照              bb['snapshot'] = ''              if len(name_node) > 1:                bb['snapshot'] = name_node[1].attrs['href']              # 寶貝規格              bb['spec'] = n.select('.spec')[0].text.strip()              # 寶貝價格              bb['price'] = float(n.find('td', class_='price').attrs['title'])              # 寶貝數量              bb['quantity'] = int(n.find('td', class_='quantity').attrs['title'])              bb['is_goods'] = True            baobei.append(bb)            # 嘗試獲取實付款            # 實付款所在的節點可能跨越多個tr的td            amount_node = n.select('td.amount em.real-price')            if amount_node:              info['amount'] = float(amount_node[0].text)          except Exception as e:            errors.append({              'type' : 'baobei',              'id' : '{}.{}'.format(cur_page, count),              'node' : '{}'.format(n),              'error' : '{}'.format(e)            })      except Exception as e:        errors.append({          'type' : 'order',          'id' : '{}.{}'.format(cur_page, count),          'node' : '{}'.format(item),          'error' : '{}'.format(e)        })      info['baobei'] = baobei      orders.append(info)    stdout_cr('working... {:.0%}'.format(cur_page / total_pages))    # 下一頁    cur_page += 1    if cur_page > total_pages or out_date:      break    query_data.update({'pageNum' : cur_page})    page_url = '{}?{}'.format(url, urllib.urlencode(query_data))    res = urllib2.urlopen(page_url)    soup = BeautifulSoup(res.read().decode('gbk'))  stdout_cr()  if errors:    print('INFO. 有錯誤發生,統計結果可能不準確。')    # pprint(errors)  return ordersdef output(orders, start_date, end_date):  amount = 0.0  org_amount = 0  baobei_count = 0  order_count = 0  invaild_order_count = 0  for order in orders:    if order['status'] in INVALID_ORDER_STATES:      invaild_order_count += 1      continue    amount += order['amount']    order_count += 1    for baobei in order.get('baobei', []):      if not baobei['is_goods']:        continue      org_amount += baobei['price'] * baobei['quantity']      baobei_count += baobei['quantity']  print('{:<9} {}'.format('累計消費:', amount))  print('{:<9} {}/{}'.format('訂單/寶貝:', order_count, baobei_count))  if invaild_order_count:    print('{:<9} {} (退貨或取消等, 不在上述訂單之內)'.format('無效訂單:', invaild_order_count))  print('{:<7} {}'.format('寶貝原始總價:', org_amount))  print('{:<7} {:.2f}'.format('寶貝平均單價:', 0 if baobei_count == 0 else org_amount / baobei_count))  print('{:<9} {} ({:.2%})'.format('節約了(?):',                   org_amount - amount,                   0 if org_amount == 0 else (org_amount - amount) / org_amount))  from_date = start_date if start_date else orders[-1]['date']  to_date = end_date if end_date else datetime.now()  print('{:<9} {:%Y-%m-%d} - {:%Y-%m-%d}'.format('統計區間:', from_date, to_date))  if not start_date:    print('{:<9} {:%Y-%m-%d %H:%M}'.format('敗家始于:', orders[-1]['date']))def ouput_orders(orders):  print('所有訂單:')  if not orders:    print(' --')    return  for order in orders:    print(' {:-^20}'.format('-'))    print(' * 訂單號: {orderid} 實付款: {amount} 店鋪: {shop_name} 時間: {date:%Y-%m-%d %H:%M}'.format(**order))    for bb in order['baobei']:      if not bb['is_goods']:        continue      print('  - {name}'.format(**bb))      if bb['spec']:        print('   {spec}'.format(**bb))      print('   {price} X {quantity}'.format(**bb))def main():  parser = argparse.ArgumentParser(    prog='python {}'.format(__file__)  )  parser.add_argument('-u', '--username', help='淘寶用戶名')  parser.add_argument('-p', '--password', help='淘寶密碼')  parser.add_argument('-s', '--start', help='起始時間,可選, 格式如: 2014-11-11')  parser.add_argument('-e', '--end', help='結束時間,可選, 格式如: 2014-11-11')  parser.add_argument('--verbose', action='store_true', default=False, help='訂單詳細輸出')  parser.add_argument('-v', '--version', action='version',            version='v{}'.format(__version__), help='版本號')  args = parser.parse_args()  usr = args.username  if not usr:    usr = raw_input('輸入淘寶用戶名: '.encode(RAW_IMPUT_ENCODING))  usr = usr.decode('utf-8') # 中文輸入問題  pwd = args.password  if not pwd:    if platform.system() == 'Windows':      # Windows下中文輸出有問題      pwd = getpass()    else:      pwd = getpass('輸入淘寶密碼: '.encode('utf-8'))  pwd = pwd.decode('utf-8')  verbose = args.verbose  start_date = None  if args.start:    try:      start_date = datetime.strptime(args.start, '%Y-%m-%d')    except Exception as e:      sys.exit('ERROR. {}'.format(e))  end_date = None  if args.end:    try:      end_date = datetime.strptime(args.end, '%Y-%m-%d')    except Exception as e:      sys.exit('ERROR. {}'.format(e))  if start_date and end_date and start_date > end_date:    sys.exit('ERROR, 結束日期必須晚于或等于開始日期')  cj_file = './{}.tmp'.format(usr)  cj = cookielib.LWPCookieJar()  try:    cj.load(cj_file)  except:    pass  opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), urllib2.HTTPHandler)  urllib2.install_opener(opener)  login(usr, pwd)  try:    cj.save(cj_file)  except:    pass  orders = parse_bought_list(start_date, end_date)  output(orders, start_date, end_date)  # 輸出訂單明細  if verbose:    ouput_orders(orders)if __name__ == '__main__':  main()

發表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發表
主站蜘蛛池模板: 长丰县| 湘潭县| 博客| 桦甸市| 柳河县| 和静县| 长沙县| 留坝县| 涡阳县| 镇雄县| 林西县| 罗城| 文安县| 东丽区| 抚顺县| 霍州市| 芦溪县| 本溪| 阿克苏市| 乌鲁木齐县| 娄烦县| 章丘市| 开阳县| 察隅县| 虎林市| 奉化市| 巴塘县| 边坝县| 葵青区| 云龙县| 阿拉尔市| 阜南县| 屏东市| 保山市| 秭归县| 伊金霍洛旗| 京山县| 武威市| 平陆县| 沭阳县| 凤翔县|