[python]抓取滬深股市交易龍虎榜數據
python 3.5.0下運行
沒做自動建立files文件夾,需要手動在py文件目錄下建立files文件夾后運行

#coding=utf-8import gzipimport http.cookiejarimport urllib.requestimport urllib.parseimport jsonimport osimport timeimport datetimedef getOpener(head): # deal with the Cookies cj = http.cookiejar.CookieJar() PRo = urllib.request.HTTPCookieProcessor(cj) opener = urllib.request.build_opener(pro) header = [] for key, value in head.items(): elem = (key, value) header.append(elem) opener.addheaders = header return openerdef ungzip(data): try: # 嘗試解壓 print('正在解壓.....') data = gzip.decompress(data) print('解壓完畢!') except: print('未經壓縮, 無需解壓') return datadef writeFile(fname,data): filename = r'files/'+fname+'.txt' if os.path.exists(filename): message = '文件 + '+filename +' 已存在,跳過' else: message = '文件 + '+filename +' 不存在,新建' f=open(filename,'w') f.write(data) f.close() print ('文件:'+fname+' 處理完畢。')'''讀取抓取數據開始日期如果不存在該日期,從10日前開始讀取如果存在從文件內日期開始讀取讀到今天'''header = { 'Connection': 'Keep-Alive', 'Accept': '*/*', 'Accept-Language': 'zh-CN,zh;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36', 'Accept-Encoding': 'gzip, deflate', 'Host': '', 'Referer' : ''}shUrl = 'http://query.sse.com.cn/infodisplay/showTradePublicFile.do?dateTx='#2015-09-28szUrl = ['http://www.szse.cn/szseWeb/common/szse/files/text/jy/jy',#150923.txt 'http://www.szse.cn/szseWeb/common/szse/files/text/smeTxt/gk/sme_jy',#150708.txt 'http://www.szse.cn/szseWeb/common/szse/files/text/nmTxt/gk/nm_jy']#150902.txtstartFileName = r'startDay.txt'endDay = datetime.datetime.now()if os.path.exists(startFileName): print('日期配置文件存在,開始讀取') f=open(startFileName,'rt') s = f.readline() f.close() if s!='': print('將從日期:'+s+' 開始讀取') timeArray = time.strptime(s, "%Y%m%d") timeStamp = int(time.mktime(timeArray)) fromDay = datetime.datetime.utcfromtimestamp(timeStamp) else: print('日期配置文件為空,將從10日前日期開始讀取') fromDay = endDay - datetime.timedelta(days = 10)else: print('日期配置文件不存在,將從10日前日期開始讀取') fromDay = endDay - datetime.timedelta(days = 10)endDay = endDay + datetime.timedelta(days = 1)while fromDay.strftime("%Y%m%d")!=endDay.strftime("%Y%m%d"): print(fromDay.strftime("%Y%m%d")) ''' 循環上面日期 抓取上證,深證,中小創交易龍虎榜數據 如果內容不為空 文件不存在 寫入文件 ''' #抓取上證龍虎榜數據 url = shUrl + fromDay.strftime("%Y-%m-%d") print('讀取上證龍虎榜/n'+url) header['Host'] = 'query.sse.com.cn' header['Referer'] = 'http://www.sse.com.cn/disclosure/diclosure/public/' try: opener = getOpener(header) op = opener.open(url) data = op.read() data = data.decode() jsonData = json.loads(data) outData = '' if (jsonData['fileContents']!=''): for info in jsonData['fileContents']: outData= outData+ info+'/n' writeFile(fromDay.strftime("%Y-%m-%d")+'_上證',outData) except: print(fromDay.strftime("%Y-%m-%d")+'跳過') #抓取深證,中小創交易龍虎榜數據 i=1 for url in szUrl: if(i==1): name = '深證' elif(i==2): name = '中小板' else: name = '創業板' url = url + fromDay.strftime("%y%m%d")+'.txt' print('讀取'+name+'龍虎榜/n'+url) header['Host'] = 'www.szse.cn' header['Referer'] = 'http://www.szse.cn' try: opener = getOpener(header) op = opener.open(url) data = op.read() data = ungzip(data) data = data.decode('gbk') writeFile(fromDay.strftime("%Y-%m-%d")+'_'+name,data) except: print(fromDay.strftime("%Y-%m-%d")+'跳過') i=i+1 fromDay = fromDay + datetime.timedelta(days = 1)#最后更新日期為當前日期print('設置最新日期')fromDay = fromDay - datetime.timedelta(days = 1)f=open(startFileName,'w')f.write(fromDay.strftime("%Y%m%d"))f.close()print('讀取完成')
新聞熱點
疑難解答