国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 編程 > Python > 正文

Python實現批量將word轉html并將html內容發布至網站的方法

2019-11-25 17:11:30
字體:
來源:轉載
供稿:網友

本文實例講述了Python實現批量將word轉html并將html內容發布至網站的方法。分享給大家供大家參考。具體實現方法如下:

#coding=utf-8__author__ = 'zhm'from win32com import client as wcimport osimport timeimport randomimport MySQLdbimport redef wordsToHtml(dir):#批量把文件夾的word文檔轉換成html文件 #金山WPS調用,搶先版的用KWPS,正式版WPS word = wc.Dispatch('KWPS.Application') for path, subdirs, files in os.walk(dir):  for wordFile in files:   wordFullName = os.path.join(path, wordFile)   #print "word:" + wordFullName   doc = word.Documents.Open(wordFullName)   wordFile2 = unicode(wordFile, "gbk")   dotIndex = wordFile2.rfind(".")   if(dotIndex == -1):    print '********************ERROR: 未取得后綴名!'   fileSuffix = wordFile2[(dotIndex + 1) : ]   if(fileSuffix == "doc" or fileSuffix == "docx"):    fileName = wordFile2[ : dotIndex]    htmlName = fileName + ".html"    htmlFullName = os.path.join(unicode(path, "gbk"), htmlName)    # htmlFullName = unicode(path, "gbk") + "http://" + htmlName    print u'生成了html文件:' + htmlFullName    doc.SaveAs(htmlFullName, 8)    doc.Close() word.Quit() print "" print "Finished!"def html_add_to_db(dir):#將轉換成功的html文件批量插入數據庫中。 conn = MySQLdb.connect(  host='localhost',  port=3306,  user='root',  passwd='root',  db='test',  charset='utf8'  ) cur = conn.cursor() for path, subdirs, files in os.walk(dir):  for htmlFile in files:   htmlFullName = os.path.join(path, htmlFile)   title = os.path.splitext(htmlFile)[0]   targetDir = 'D:/files/htmls/'   #D:/files為web服務器配置的靜態目錄   sconds = time.time()   msconds = sconds * 1000   targetFile = os.path.join(targetDir, str(int(msconds))+str(random.randint(100, 10000)) +'.html')   htmlFile2 = unicode(htmlFile, "gbk")   dotIndex = htmlFile2.rfind(".")   if(dotIndex == -1):    print '********************ERROR: 未取得后綴名!'   fileSuffix = htmlFile2[(dotIndex + 1) : ]   if(fileSuffix == "htm" or fileSuffix == "html"):    if not os.path.exists(targetDir):     os.makedirs(targetDir)    htmlFullName = os.path.join(unicode(path, "gbk"), htmlFullName)    htFile = open(htmlFullName,'rb')    #獲取網頁內容    htmStrCotent = htFile.read()    #找出里面的圖片    img=re.compile(r"""<img/s.*?/s?src/s*=/s*['|"]?([^/s'"]+).*?>""",re.I)    m = img.findall(htmStrCotent)    for tagContent in m:     imgSrc = unicode(tagContent, "gbk")     imgSrcFullName = os.path.join(path, imgSrc)     #上傳圖片     imgTarget = 'D:/files/images/whzx/'     img_sconds = time.time()     img_msconds = sconds * 1000     targetImgFile = os.path.join(imgTarget, str(int(img_msconds))+str(random.randint(100, 10000)) +'.png')     if not os.path.exists(imgTarget):      os.makedirs(imgTarget)     if not os.path.exists(targetImgFile) or(os.path.exists(targetImgFile) and (os.path.getsize(targetImgFile) != os.path.getsize(imgSrcFullName))):      tmpImgFile = open(imgSrcFullName,'rb')      tmpWriteImgFile = open(targetImgFile, "wb")      tmpWriteImgFile.write(tmpImgFile.read())      tmpImgFile.close()      tmpWriteImgFile.close()      htmStrCotent=htmStrCotent.replace(tagContent,targetImgFile.split(":")[1])    if not os.path.exists(targetFile) or(os.path.exists(targetFile) and (os.path.getsize(targetFile) != os.path.getsize(htmlFullName))):     #用iframe包裝轉換好的html文件。     iframeHtml='''     <script type="text/javascript" language="javascript">      function iFrameHeight() {       var ifm= document.getElementById("iframepage");       var subWeb = document.frames ? document.frames["iframepage"].document:ifm.contentDocument;       if(ifm != null && subWeb != null) {        ifm.height = subWeb.body.scrollHeight;       }      }     </script>     <iframe src='''+targetFile.split(':')[1]+'''      marginheight="0" marginwidth="0" frameborder="0" scrolling="no" width="765" height=100% id="iframepage" name="iframepage" onLoad="iFrameHeight()" ></iframe>     '''     tmpTargetFile = open(targetFile, "wb")     tmpTargetFile.write(htmStrCotent)     tmpTargetFile.close()     htFile.close()     try:      # 執行      sql = "insert into common_article(title,content) values(%s,%s)"      param = (unicode(title, "gbk"),iframeHtml)      cur.execute(sql,param)     except:      print "Error: unable to insert data" cur.close() conn.commit() # 關閉數據庫連接 conn.close()if __name__ == '__main__': wordsToHtml('d:/word') html_add_to_db('d:/word')

希望本文所述對大家的Python程序設計有所幫助。

發表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發表
主站蜘蛛池模板: 丹阳市| 白城市| 北川| 钟祥市| 临沂市| 商城县| 汉寿县| 西城区| 呼伦贝尔市| 永吉县| 靖宇县| 玉门市| 象山县| 张家界市| 天津市| 柯坪县| 新昌县| 萝北县| 凤台县| 革吉县| 双鸭山市| 新晃| 海安县| 淮北市| 井冈山市| 民和| 灵山县| 鄄城县| 平舆县| 巨野县| 惠安县| 富平县| 双柏县| 仁布县| 绥德县| 大厂| 广州市| 河曲县| 时尚| 永平县| 长汀县|