代碼如下:
import socket
import re
'''
廣東省公安廳出入境政務(wù)服務(wù)網(wǎng)護(hù)照,通行證辦理進(jìn)度查詢。
分析網(wǎng)址格式為 http://www.gdcrj.com/wsyw/tcustomer/tcustomer.do?&method=find&applyid=身份證號碼
構(gòu)造socket請求網(wǎng)頁html,利用正則匹配出查詢結(jié)果
'''
def gethtmlbyidentityid(identityid):
 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 host = 'www.gdcrj.com';
 suburl = '/wsyw/tcustomer/tcustomer.do?&method=find&applyid={0}'
 port = 80;
 remote_ip = socket.gethostbyname(host)
 s.connect((remote_ip , port))
print('【INFO】:socket連接成功')
message = 'GET '+ suburl.format(identityid) +' HTTP/1.1/r/nHost: '+ host +'/r/n/r/n'
 # str 2 bytes
 m_bytes = message.encode('utf-8')
 # send bytes
 s.sendall(m_bytes)
print('【INFO】:遠(yuǎn)程下載中...')
 recevstr = ''
 while True:
  # return bytes
  recev = s.recv(4096)
  # bytes 2 str
  recevstr += recev.decode(encoding = 'utf-8', errors = 'ignore')
  if not recev:
   s.close()
   print('【INFO】:遠(yuǎn)程下載網(wǎng)頁完成')
   break
 return recevstr
'''
利用正則表達(dá)式從上步獲取的網(wǎng)頁html內(nèi)容里找出查詢結(jié)果
'''
def getresultfromhtml(htmlstr):
 linebreaks = re.compile(r'/n/s*')
 space = re.compile('( )+')
 resultReg = re.compile(r'/<td class="news_font"/>([^<td]+)/</td/>', re.MULTILINE)
 #去除換行符和空格
 htmlstr = linebreaks.sub('', htmlstr)
 htmlstr = space.sub(' ', htmlstr)
 #匹配出查詢結(jié)果
 result = resultReg.findall(htmlstr)
 for res in result:
  print(res.strip())
if __name__ == '__main__':
 identityid = input('輸入您的身份證號碼(僅限廣東省居民查詢):')
 try:
  identityid = int(identityid)
  print('【INFO】:開始查詢')
  html = gethtmlbyidentityid(identityid)
  getresultfromhtml(html)
  print('【INFO】:查詢成功')
 except:
  print('【W(wǎng)ARN】:輸入非法')
 input('【INFO】:按任意鍵退出')
新聞熱點
疑難解答
圖片精選